Add cleanup script
This commit is contained in:
parent
71b7dac492
commit
1802eeffe3
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
venv
|
||||
.venv
|
||||
__pycache__
|
||||
cookies.txt
|
||||
cookies.txt
|
||||
.env
|
87
clean.py
Normal file
87
clean.py
Normal file
@ -0,0 +1,87 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import argparse
|
||||
import logging
|
||||
import boto3
|
||||
from botocore.client import Config as BotoConfig
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def cleanup_old_files(dry_run=False, days=7, directory_prefix="niconico"):
|
||||
required_env_vars = [
|
||||
'NICONICOGAY_S3_ACCESS_KEY',
|
||||
'NICONICOGAY_S3_SECRET_KEY',
|
||||
'NICONICOGAY_S3_BUCKET_NAME',
|
||||
'NICONICOGAY_S3_REGION'
|
||||
]
|
||||
missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
|
||||
if missing_vars:
|
||||
logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
s3_session = boto3.Session()
|
||||
s3_client = s3_session.client(
|
||||
's3',
|
||||
aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
|
||||
aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
|
||||
region_name=os.environ['NICONICOGAY_S3_REGION'],
|
||||
endpoint_url=f"https://{os.environ['NICONICOGAY_S3_REGION']}.digitaloceanspaces.com",
|
||||
config=BotoConfig(s3={'addressing_style': 'virtual'}),
|
||||
)
|
||||
|
||||
bucket_name = os.environ['NICONICOGAY_S3_BUCKET_NAME']
|
||||
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)
|
||||
paginator = s3_client.get_paginator('list_objects_v2')
|
||||
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=f"{directory_prefix}/")
|
||||
|
||||
total_files = 0
|
||||
objects_to_delete = []
|
||||
|
||||
for page in page_iterator:
|
||||
if 'Contents' not in page:
|
||||
continue
|
||||
|
||||
for obj in page['Contents']:
|
||||
total_files += 1
|
||||
if obj['LastModified'] < cutoff_date: # type: ignore
|
||||
objects_to_delete.append({'Key': obj['Key']}) # type: ignore
|
||||
|
||||
if len(objects_to_delete) == 0:
|
||||
logger.info("No files to delete")
|
||||
return
|
||||
|
||||
if dry_run:
|
||||
logger.info(f"DRY RUN: Would delete {len(objects_to_delete)} out of {total_files} files")
|
||||
else:
|
||||
# Delete files in batches of 1000 (S3 limit?)
|
||||
for i in range(0, len(objects_to_delete), 1000):
|
||||
batch = objects_to_delete[i:i+1000]
|
||||
s3_client.delete_objects(
|
||||
Bucket=bucket_name,
|
||||
Delete={'Objects': batch}
|
||||
)
|
||||
logger.info(f"Successfully deleted {len(objects_to_delete)} out of {total_files} files")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_dotenv()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would be deleted without actually deleting anything"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
cleanup_old_files(dry_run=args.dry_run)
|
Loading…
x
Reference in New Issue
Block a user