import os
import sys
import datetime
import argparse
import logging
import boto3
from botocore.client import Config as BotoConfig
from dotenv import load_dotenv

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

def cleanup_old_files(dry_run=False, days=7, directory_prefix="niconico"):
    required_env_vars = [
        'NICONICOGAY_S3_ACCESS_KEY',
        'NICONICOGAY_S3_SECRET_KEY',
        'NICONICOGAY_S3_BUCKET_NAME',
        'NICONICOGAY_S3_REGION'
    ]
    missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
    if missing_vars:
        logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
        sys.exit(1)
    
    try:
        s3_session = boto3.Session()
        s3_client = s3_session.client(
            's3',
            aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
            aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
            region_name=os.environ['NICONICOGAY_S3_REGION'],
            endpoint_url=f"https://{os.environ['NICONICOGAY_S3_REGION']}.digitaloceanspaces.com",
            config=BotoConfig(s3={'addressing_style': 'virtual'}),
        )
        
        bucket_name = os.environ['NICONICOGAY_S3_BUCKET_NAME']
        cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)
        paginator = s3_client.get_paginator('list_objects_v2')
        page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=f"{directory_prefix}/")
        
        total_files = 0
        objects_to_delete = []
        
        for page in page_iterator:
            if 'Contents' not in page:
                continue
            
            for obj in page['Contents']:
                total_files += 1
                if obj['LastModified'] < cutoff_date:  # type: ignore
                    objects_to_delete.append({'Key': obj['Key']})  # type: ignore

        if len(objects_to_delete) == 0:
            logger.info("No files to delete")
            return
        
        if dry_run:
            logger.info(f"DRY RUN: Would delete {len(objects_to_delete)} out of {total_files} files")
        else:
            # Delete files in batches of 1000 (S3 limit?)
            for i in range(0, len(objects_to_delete), 1000):
                batch = objects_to_delete[i:i+1000]
                s3_client.delete_objects(
                    Bucket=bucket_name,
                    Delete={'Objects': batch}
                )
            logger.info(f"Successfully deleted {len(objects_to_delete)} out of {total_files} files")
            
    except Exception as e:
        logger.error(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    load_dotenv()
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--dry-run", 
        action="store_true", 
        help="Show what would be deleted without actually deleting anything"
    )
    args = parser.parse_args()
    
    cleanup_old_files(dry_run=args.dry_run)