diff --git a/.gitignore b/.gitignore
index 8f5e87a..8bd1691 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 venv
 .venv
 __pycache__
-cookies.txt
\ No newline at end of file
+cookies.txt
+.env
+access_times.json
\ No newline at end of file
diff --git a/access_tracker.py b/access_tracker.py
new file mode 100644
index 0000000..22661ab
--- /dev/null
+++ b/access_tracker.py
@@ -0,0 +1,65 @@
+import json
+import os
+import threading
+import time
+from typing import Dict, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+class AccessTracker:
+    """Tracks when video URLs are accessed, storing data in JSON file and keeping it in memory"""
+    
+    def __init__(self, json_file_path: str = "access_times.json"):
+        self.json_file_path = json_file_path
+        self.access_times: Dict[str, float] = {}
+        self.lock = threading.Lock()
+        self._load_from_file()
+    
+    def _load_from_file(self) -> None:
+        """Load access times from JSON file into memory"""
+        try:
+            if os.path.exists(self.json_file_path):
+                with open(self.json_file_path, 'r') as f:
+                    self.access_times = json.load(f)
+                logger.info(f"Loaded {len(self.access_times)} access times from {self.json_file_path}")
+            else:
+                logger.info(f"Access times file {self.json_file_path} does not exist, starting fresh")
+        except Exception as e:
+            logger.error(f"Error loading access times from {self.json_file_path}: {e}")
+            self.access_times = {}
+    
+    def _save_to_file(self) -> None:
+        """Save current access times from memory to JSON file"""
+        try:
+            with open(self.json_file_path, 'w') as f:
+                json.dump(self.access_times, f, indent=2)
+            logger.debug(f"Saved {len(self.access_times)} access times to {self.json_file_path}")
+        except Exception as e:
+            logger.error(f"Error saving access times to {self.json_file_path}: {e}")
+    
+    def record_access(self, video_id: str) -> None:
+        """Record that a video was accessed at the current time"""
+        current_time = time.time()
+        with self.lock:
+            self.access_times[video_id] = current_time
+            self._save_to_file()
+        logger.debug(f"Recorded access for {video_id} at {current_time}")
+    
+    def get_last_access(self, video_id: str) -> Optional[float]:
+        """Get the last access time for a video (returns None if never accessed)"""
+        with self.lock:
+            return self.access_times.get(video_id)
+    
+    def get_all_access_times(self) -> Dict[str, float]:
+        """Get a copy of all access times"""
+        with self.lock:
+            return self.access_times.copy()
+    
+    def remove_access_record(self, video_id: str) -> None:
+        """Remove access record for a video (e.g., when video is deleted)"""
+        with self.lock:
+            if video_id in self.access_times:
+                del self.access_times[video_id]
+                self._save_to_file()
+                logger.debug(f"Removed access record for {video_id}")
diff --git a/app.py b/app.py
index 4161969..a5de1eb 100644
--- a/app.py
+++ b/app.py
@@ -1,90 +1,460 @@
 import os
 import http.cookiejar
 import json
+import re
 import requests
 from bs4 import BeautifulSoup
-from flask import Flask, Response
+from flask import Flask, Response, request, jsonify
 from diskcache import Cache
 import logging
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
+import threading
+import time
+import tempfile
+import nndownload
+import boto3
+from botocore.client import Config as BotoConfig
+import urllib.parse
 
+from dotenv import load_dotenv
+from access_tracker import AccessTracker
+load_dotenv()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s.%(msecs)03d - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
+logger = logging.getLogger(__name__)
+logger.setLevel(os.environ.get('NICONICOGAY_LOG', 'INFO').upper())
 app = Flask(__name__)
 
-CACHE_EXPIRATION_SECONDS = 3600  # 1 hour
+HOST = os.environ.get('NICONICOGAY_HOST', 'https://nicovideo.gay')
+S3_BUCKET_NAME = os.environ.get('NICONICOGAY_S3_BUCKET_NAME')
+S3_REGION = os.environ.get('NICONICOGAY_S3_REGION')
+CDN_BASE_URL = os.environ.get('NICONICOGAY_CDN_BASE_URL')
+MAX_CONCURRENT_DOWNLOADS = 3
+CACHE_EXPIRATION_HTML = 60 * 60  # 1 hour
+CACHE_EXPIRATION_CDN = 60 * 60 * 24 * 7  # 1 week
 CACHE_SIZE_LIMIT = 100 * 1024 * 1024  # 100 MB
-cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
 
+cache = None
+if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '1':
+    cache = Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
+    logger.debug("Using disk cache")
+else:
+    logger.info("Disk cache disabled")
+
+user_session = None
 cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
 try:
     cookie_jar.load(ignore_discard=True, ignore_expires=True)
+    user_session = next((cookie.value for cookie in cookie_jar if cookie.name == 'user_session'), None)
 except FileNotFoundError:
-    logger.warning("cookies.txt not found, starting with empty cookie jar")
+    logger.info("cookies.txt not found, starting with empty cookie jar")
 
 s = requests.Session()
 s.headers.update({
-    "User-Agent": "Twitterbot/1.0"
+    "User-Agent": os.environ.get('NICONICOGAY_USER_AGENT', 'Twitterbot/1.0')
 })
 s.cookies = cookie_jar  # type: ignore
 
-@app.route("/watch/<video_id>")
-def proxy(video_id):
-    logger.info(f"Received request for video ID: {video_id}")
+s3_client = None
+if all(key in os.environ for key in [
+    'NICONICOGAY_S3_ACCESS_KEY',
+    'NICONICOGAY_S3_SECRET_KEY',
+]):
+    s3_session = boto3.Session()
+    s3_client = s3_session.client(
+        's3',
+        aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
+        aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
+        region_name=S3_REGION,
+        endpoint_url=f"https://{S3_REGION}.digitaloceanspaces.com",
+        config=BotoConfig(s3={'addressing_style': 'virtual'}),
+    )
+else:
+    logger.info("S3 credentials not provided. Videos will not be downloaded.")
 
-    if cache:
-        cached_html = cache.get(video_id)
-        if cached_html is not None:
-            logger.info(f"Using cached response for video ID: {video_id}")
-            return Response(cached_html, mimetype="text/html")  # type: ignore
+download_tracker = {
+    'active_downloads': 0,
+    'in_progress': set(),
+}
+download_lock = threading.Lock()
+download_queue = []
 
-    # Not in cache or cache expired; fetch from nicovideo.jp
-    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
+access_tracker = AccessTracker()
+
+def download_and_upload_video(video_id, url, video_quality):
     try:
-        logger.info(f"Fetching content from URL: {real_url}")
+        with download_lock:
+            download_tracker['active_downloads'] += 1
+            download_tracker['in_progress'].add(video_id)
+        
+        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as temp_file:
+            temp_path = temp_file.name
+        
+        try:
+            logger.info(f"{video_id}: Starting download")
+            nndownload_args = [
+                "--no-login",
+                "--user-agent", "Googlebot/2.1",
+                "--video-quality", video_quality,
+                "--output-path", temp_path,
+                url
+            ]
+            if user_session:
+                nndownload_args += ["--session-cookie", user_session]
+                nndownload_args = nndownload_args[1:]
+            nndownload.execute(*nndownload_args)
+            
+            if os.path.exists(temp_path) and s3_client and S3_BUCKET_NAME:
+                logger.info(f"{video_id}: Downloaded, uploading to CDN")
+                try:
+                    s3_key = f"niconico/{video_id}.mp4"
+                    s3_client.upload_file(
+                        temp_path, 
+                        S3_BUCKET_NAME, 
+                        s3_key,
+                        ExtraArgs={'ContentType': 'video/mp4', 'ACL': 'public-read'}
+                    )
+                    
+                    logger.info(f"{video_id}: Upload successful to CDN")
+                    
+                    if cache is not None:
+                        cache.set(f"{video_id}_cdn", True, expire=CACHE_EXPIRATION_CDN)
+                        # Clear HTML cache for this video to ensure next view gets updated HTML
+                        cache.delete(f"{video_id}_html")
+                        logger.debug(f"{video_id}: Cleared HTML cache")
+                    
+                    return True
+                except Exception as e:
+                    logger.error(f"{video_id}: Error uploading to CDN: {e}")
+                    return False
+            else:
+                logger.error(f"{video_id}: Failed to download or S3 client not configured")
+                return False
+        finally:
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
+                logger.debug(f"Removed temporary file: {temp_path}")
+    except Exception as e:
+        logger.error(f"{video_id}: Error in download process: {e}")
+        return False
+    finally:
+        with download_lock:
+            download_tracker['active_downloads'] -= 1
+            download_tracker['in_progress'].discard(video_id)
+
+def download_worker():
+    while True:
+        try:
+            with download_lock:
+                can_download = download_tracker['active_downloads'] < MAX_CONCURRENT_DOWNLOADS
+                queue_has_items = len(download_queue) > 0
+            
+            if queue_has_items and can_download:
+                with download_lock:
+                    # Get next video that is not already being downloaded
+                    for i, (video_id, _, _) in enumerate(download_queue):
+                        if video_id not in download_tracker['in_progress']:
+                            video_info = download_queue.pop(i)
+                            threading.Thread(target=download_and_upload_video, 
+                                            args=(video_info[0], video_info[1], video_info[2])).start()
+                            break
+            time.sleep(1)
+        except Exception as e:
+            logger.error(f"Error in download worker: {e}")
+            time.sleep(5)  # Back off in case of error
+
+worker_thread = threading.Thread(target=download_worker, daemon=True)
+worker_thread.start()
+
+def is_video_in_cdn(video_id):
+    """Check if video exists in CDN"""
+    if cache is not None and cache.get(f"{video_id}_cdn"):
+        logger.debug(f"{video_id}: Already uploaded to CDN (cached)")
+        return True
+
+    if not s3_client or not S3_BUCKET_NAME:
+        logger.warning("S3 client not configured. Cannot check if video exists in CDN.")
+        return False
+
+    try:
+        s3_client.head_object(Bucket=S3_BUCKET_NAME, Key=f"niconico/{video_id}.mp4")
+        return True
+    except Exception:
+        return False
+    
+def is_video_being_downloaded(video_id):
+    """Check if video is currently being downloaded"""
+    with download_lock:
+        return video_id in download_tracker['in_progress']
+
+def get_cdn_url(video_id):
+    """Get the CDN URL for a video"""
+    return f"{CDN_BASE_URL}/niconico/{video_id}.mp4"
+
+def get_video_resolution(params):
+    if not params:
+        return None, None
+    video = params['media']['domand']['videos'][0]
+    return video['width'], video['height']
+
+def get_video_quality(params, quality_level_threshold=3):
+    """Get the code of the best video quality available (optionally below a certain threshold)"""
+    videos = params['media']['domand']['videos']
+    eligible_videos = [v for v in videos if v['qualityLevel'] < quality_level_threshold]
+    if not eligible_videos:
+        return None
+    return str(max(eligible_videos, key=lambda x: int(x['qualityLevel']))['id'])
+
+def get_data(video_id, real_url):
+    """Get the server response for a given video ID"""
+    try:
+        logger.debug(f"{video_id}: Fetching content from URL: {real_url}")
         r = s.get(real_url, timeout=10)
+        # r.raise_for_status()
     except requests.RequestException as e:
-        logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
-        return Response(status=500)
+        logger.error(f"{video_id}: Error fetching the page ('{real_url}'): {e}")
+        return None, None
 
     soup = BeautifulSoup(r.text, "html.parser")
-    thumbnail_url = None
     try:
         server_response = soup.find("meta", {"name": "server-response"})
         if server_response:
             params = json.loads(server_response["content"])["data"]["response"]  # type: ignore
-            thumbnail_url = (
-                params["video"]["thumbnail"].get("ogp") or
-                params["video"]["thumbnail"].get("player") or
-                params["video"]["thumbnail"].get("largeUrl") or
-                params["video"]["thumbnail"].get("middleUrl") or
-                params["video"]["thumbnail"].get("url")
-            )
+            return params, soup
     except (KeyError, json.JSONDecodeError) as e:
-        logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
+        logger.warning(f"{video_id}: Failed to extract thumbnail info: {e}")
         pass
 
-    og_tags = soup.find_all("meta", property=lambda x: x)  # type: ignore
+    return None, soup
+
+def human_format(num):
+    """Format a number in a human-readable way (e.g., 1K, 2M, etc.)"""
+    if num is None:
+        return None
+    num = float('{:.3g}'.format(num))
+    magnitude = 0
+    while abs(num) >= 1000:
+        magnitude += 1
+        num /= 1000.0
+    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
+
+def get_oembed_url(params):
+    """Get the oEmbed (/owoembed) URL based on the given params (server response)"""
+    if not params:
+        return None
+
+    author_id = None
+    author_name = None
+    if params.get('owner'):
+        author_id = params['owner'].get('id')
+        author_name = params['owner'].get('nickname')
+    video_id = params.get('video', {}).get('id')
+
+    if not video_id:
+        return None
+
+    view_count = human_format(params.get('video', {}).get('count', {}).get('view')) or "n/a"
+    comment_count = human_format(params.get('video', {}).get('count', {}).get('comment')) or "n/a"
+    like_count = human_format(params.get('video', {}).get('count', {}).get('like')) or "n/a"
+    mylist_count = human_format(params.get('video', {}).get('count', {}).get('mylist')) or "n/a"
+    provder_stats = f"👁️ {view_count}   💬 {comment_count}   ❤️ {like_count}   📝 {mylist_count}"
+
+    author_name_encoded = urllib.parse.quote(author_name) if author_name else ""
+    provider_stats_encoded = urllib.parse.quote(provder_stats)
+
+    oembed_url = (
+        f"{HOST}/owoembed?"
+        f"author_id={author_id if author_id else ''}&"
+        f"author_name={author_name_encoded}&"
+        f"video_id={video_id}&"
+        f"provider={provider_stats_encoded}"
+    )
+
+    return oembed_url
+
+
+
+@app.route("/watch/<video_id>")
+def proxy(video_id):
+    logger.info(f"{video_id}: Received request")
+
+    cache_html_suffix = "_html"
+    request_user_agent = request.headers.get('User-Agent', '').lower()
+    if 'twitterbot' in request_user_agent:
+        cache_html_suffix = "_html_twitterbot"
+    elif 'discordbot' in request_user_agent:
+        cache_html_suffix = "_html_discordbot"
+
+    if cache is not None:
+        logger.debug(f"{video_id}: Checking cache")
+        cached_html = cache.get(f"{video_id}{cache_html_suffix}")
+        if cached_html is not None:
+            logger.info(f"{video_id}: Returning cached response")
+            return Response(cached_html, mimetype="text/html")  # type: ignore
+
+    logger.debug(f"{video_id}: Cache miss - fetching")
+
+    # Not in cache or cache expired; fetch from nicovideo.jp
+    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
+    params, soup = get_data(video_id, real_url)
+
+    if not params or not soup:
+        logger.error(f"{video_id}: Failed to fetch data")
+        return Response("Video not found", status=404)
+
+    reason_code = params.get('reasonCode', '').upper()
+    if reason_code in ['HIDDEN_VIDEO', 'ADMINISTRATOR_DELETE_VIDEO', 'RIGHT_HOLDER_DELETE_VIDEO', 'DELETED_VIDEO']:
+        logger.warning(f"{video_id}: Video is hidden or deleted ({reason_code}) - returning 404")
+        return Response("Video not found", status=404)
+
+    thumbnail_url = (
+        params["video"]["thumbnail"].get("ogp") or
+        params["video"]["thumbnail"].get("player") or
+        params["video"]["thumbnail"].get("largeUrl") or
+        params["video"]["thumbnail"].get("middleUrl") or
+        params["video"]["thumbnail"].get("url")
+    ) if params else None
+    video_width, video_height = get_video_resolution(params) if params else (None, None)
+
+    download_allowed = True
+    if download_allowed and 'discordbot' not in request_user_agent:
+        logger.info(f"{video_id}: Video download ignored due to user agent ({request_user_agent})")
+        download_allowed = False
+    if params['video']['duration'] > 60 * 20:  # 20 minutes
+        logger.info(f"{video_id}: Video download ignored due to duration ({params['video']['duration']} seconds)")
+        download_allowed = False
+    video_quality = get_video_quality(params) if params else None
+    if download_allowed and video_quality is not None:
+        video_in_cdn = is_video_in_cdn(video_id)
+        video_in_progress = is_video_being_downloaded(video_id)
+        if not video_in_cdn and not video_in_progress and s3_client:
+            with download_lock:
+                # Add to queue if not already in it
+                queue_video_ids = [item[0] for item in download_queue]
+                if video_id not in queue_video_ids:
+                    download_queue.append((video_id, real_url, video_quality))
+                    logger.info(f"{video_id}: Queued for download")
+
+    cdn_video_url = get_cdn_url(video_id)
+    og_tags = soup.find_all("meta", attrs={"property": True})
+    if len(og_tags) == 0:
+        logger.warning(f"{video_id}: No Open Graph tags found")
+    og_title = None
+    og_description = None
+    og_category = None
     for tag in og_tags:
+        # Remove attribute(s) added by niconico
+        if 'data-server' in tag.attrs:
+            del tag.attrs['data-server']
+        # Set title
+        if tag.get("property") == "og:title":
+            og_title = tag["content"]
+        # Set description
+        if tag.get("property") == "og:description":
+            og_description = tag["content"]
+            if og_description and og_title:
+                # The description is formatted like "Title [Category] Description"
+                # Extract category (just incase this is useful later), and keep only the description part.
+                match = re.search(rf"^{re.escape(og_title)}(\s+\[(.*?)\])?\s+(.*)", og_description)
+                if match:
+                    og_category = match.group(2) if match.group(2) else None
+                    og_description = match.group(3)
+                    tag["content"] = og_description
         # Fix thumbnail
         if tag.get("property") == "og:image" and thumbnail_url:
             tag["content"] = thumbnail_url
+        # Fix video URL
+        if tag.get("property") == "og:video:url" or tag.get("property") == "og:video:secure_url":
+            tag["content"] = cdn_video_url
 
-    og_tags_str = "\n".join(str(tag) for tag in og_tags)
-    html_response = f"""
-        <!DOCTYPE html>
-        <html lang="en">
-        <head>
-            <meta charset="UTF-8">
-            {og_tags_str}
-        </head>
-        <body>
-        </body>
-        </html>
-    """
+    og_tags_str = "\n".join(str(tag) for tag in og_tags if tag.get("property") not in ["og:site_name"])
+    og_tags_str += '\n<meta content="ニコニコ動画" property="og:site_name"/>'
+    og_tags_str += f'\n<link rel="alternate" href="{get_oembed_url(params)}" type="application/json+oembed" title="{video_id}"/>'
 
-    if cache:
-        logging.info(f"Caching response for video ID: {video_id}")
-        cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS)
+    # Discord seems to ignore video URLs when Twitter meta tags are present,
+    # so in addition to including these when the User Agent is a Twitterbot,
+    # we also include them when the video is too long to download in order to remove the play button.
+    if 'twitterbot' in request_user_agent or not download_allowed:
+        if 'twitterbot' in request_user_agent:
+            logger.info(f"{video_id}: Twitterbot detected - adding Twitter tags")
+        elif not download_allowed:
+            logger.info(f"{video_id}: Video too long to download - will not show play button")
+        og_tags_str += f'\n<meta content="{thumbnail_url}" property="twitter:image"/>'
+        og_tags_str += '\n<meta content="summary_large_image" property="twitter:card"/>'
+        og_tags_str += '\n<meta content="www.nicovideo.gay" name="twitter:domain"/>'
+        og_tags_str += f'\n<meta content="{request.url}" name="twitter:url"/>'
+        if og_title:
+            og_tags_str += f'\n<meta content="{og_title}" name="twitter:title"/>'
+        if og_description:
+            og_tags_str += f'\n<meta content="{og_description}" name="twitter:description"/>'
+    # og_tags_str += '\n<meta content="video/mp4" property="twitter:player:stream:content_type"/>'
+    # og_tags_str += f'\n<meta content="{cdn_video_url}" property="twitter:player:stream"/>'
+    # if video_width:
+    #     og_tags_str += f'\n<meta content="{video_width}" property="twitter:player:width"/>'
+    # if video_height:
+    #     og_tags_str += f'\n<meta content="{video_height}" property="twitter:player:height"/>'
+    html_response = f"""<!DOCTYPE html>
+<!--
+niconico proxy - brought to you by https://mmaker.moe
 
+this service is intended to be used by social media open graph embed generators and discordbot.
+please do not abuse! the videos returned by the CDN are lower quality and intended to only be proxied by discord, not hotlinked.
+if you want to download videos, please consider using a tool like nndownload: https://github.com/AlexAplin/nndownload
+-->
+<html lang="en"><head><meta charset="UTF-8">
+{og_tags_str}
+</head><body></body></html>"""
+
+    if cache is not None:
+        logger.info(f"{video_id}: Caching HTML response")
+        cache.set(f"{video_id}{cache_html_suffix}", html_response, expire=CACHE_EXPIRATION_HTML)
+
+    # Record access time for CDN cleanup purposes
+    if is_video_in_cdn(video_id):
+        access_tracker.record_access(video_id)
+        logger.debug(f"{video_id}: Recorded access time for CDN tracking")
+
+    logger.info(f"{video_id}: Returning response")
+    logger.debug(f"{video_id}: HTML response:\n----------\n{html_response}\n----------")
     return Response(html_response, mimetype="text/html")
+
+@app.route("/owoembed")
+def owoembed():
+    """
+    Handles oEmbed requests with parameters in the URL
+    Returns JSON payload in oEmbed format
+    """
+    logger.info("Received request for oEmbed endpoint")
+    
+    # Get parameters from query string
+    author_id = request.args.get('author_id', '')
+    author_name = request.args.get('author_name', '')
+    video_id = request.args.get('video_id', '')
+    provider = request.args.get('provider', '')
+    
+    author_name_decoded = urllib.parse.unquote(author_name)
+    provider_decoded = urllib.parse.unquote(provider)
+    
+    # Create the author_url and provider_url
+    author_url = f"https://www.nicovideo.jp/user/{author_id}"
+    video_url = f"https://www.nicovideo.jp/watch/{video_id}"
+    
+    # Create oEmbed response
+    oembed_response = {
+        "author_name": author_name_decoded,
+        "author_url": author_url,
+        "provider_name": provider_decoded,
+        "provider_url": video_url,
+        "title": "Embed",
+        "type": "link",
+        "version": "1.0"
+    }
+    
+    logger.info(f"{video_id}: Returning oEmbed response")
+    logger.debug(f"{video_id}: oEmbed response:\n----------\n{json.dumps(oembed_response, indent=2)}\n----------")
+    return jsonify(oembed_response)
diff --git a/clean.py b/clean.py
new file mode 100644
index 0000000..cfbd7c4
--- /dev/null
+++ b/clean.py
@@ -0,0 +1,113 @@
+import os
+import sys
+import datetime
+import argparse
+import logging
+import boto3
+from botocore.client import Config as BotoConfig
+from dotenv import load_dotenv
+from access_tracker import AccessTracker
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+
+def cleanup_old_files(dry_run=False, days=7, directory_prefix="niconico"):
+    access_tracker = AccessTracker()
+    required_env_vars = [
+        'NICONICOGAY_S3_ACCESS_KEY',
+        'NICONICOGAY_S3_SECRET_KEY',
+        'NICONICOGAY_S3_BUCKET_NAME',
+        'NICONICOGAY_S3_REGION'
+    ]
+    missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
+    if missing_vars:
+        logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
+        sys.exit(1)
+    
+    try:
+        s3_session = boto3.Session()
+        s3_client = s3_session.client(
+            's3',
+            aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
+            aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
+            region_name=os.environ['NICONICOGAY_S3_REGION'],
+            endpoint_url=f"https://{os.environ['NICONICOGAY_S3_REGION']}.digitaloceanspaces.com",
+            config=BotoConfig(s3={'addressing_style': 'virtual'}),
+        )
+        
+        bucket_name = os.environ['NICONICOGAY_S3_BUCKET_NAME']
+        cutoff_timestamp = datetime.datetime.now(datetime.timezone.utc).timestamp() - (days * 24 * 60 * 60)
+        paginator = s3_client.get_paginator('list_objects_v2')
+        page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=f"{directory_prefix}/")
+        
+        total_files = 0
+        objects_to_delete = []
+        access_times = access_tracker.get_all_access_times()
+        
+        for page in page_iterator:
+            if 'Contents' not in page:
+                continue
+            
+            for obj in page['Contents']:
+                total_files += 1
+                key = obj['Key']  # type: ignore
+                
+                # Extract video_id from S3 key (e.g., "niconico/sm12345.mp4" -> "sm12345")
+                if key.startswith(f"{directory_prefix}/") and key.endswith('.mp4'):
+                    video_id = key[len(f"{directory_prefix}/"):-4]  # Remove prefix and .mp4 extension
+                    
+                    last_access = access_times.get(video_id)
+                    should_delete = False
+                    
+                    if last_access is None:
+                        # No access record - delete files that haven't been accessed since tracking started
+                        # For safety, only delete files older than the cutoff date
+                        if obj['LastModified'].timestamp() < cutoff_timestamp:  # type: ignore
+                            should_delete = True
+                            logger.debug(f"Will delete {video_id}: no access record and file is old")
+                    elif last_access < cutoff_timestamp:
+                        # Has access record but last access was too long ago
+                        should_delete = True
+                        logger.debug(f"Will delete {video_id}: last accessed {(datetime.datetime.now().timestamp() - last_access) / (24*60*60):.1f} days ago")
+                    
+                    if should_delete:
+                        objects_to_delete.append({'Key': key})
+                        # Remove the access record since we're deleting the file
+                        if not dry_run:
+                            access_tracker.remove_access_record(video_id)
+
+        if len(objects_to_delete) == 0:
+            logger.info("No files to delete")
+            return
+        
+        if dry_run:
+            logger.info(f"DRY RUN: Would delete {len(objects_to_delete)} out of {total_files} files")
+        else:
+            # Delete files in batches of 1000 (S3 limit?)
+            for i in range(0, len(objects_to_delete), 1000):
+                batch = objects_to_delete[i:i+1000]
+                s3_client.delete_objects(
+                    Bucket=bucket_name,
+                    Delete={'Objects': batch}
+                )
+            logger.info(f"Successfully deleted {len(objects_to_delete)} out of {total_files} files")
+            
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    load_dotenv()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dry-run", 
+        action="store_true", 
+        help="Show what would be deleted without actually deleting anything"
+    )
+    args = parser.parse_args()
+    
+    cleanup_old_files(dry_run=args.dry_run)
diff --git a/requirements.txt b/requirements.txt
index 01f2ec1..7bc5696 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,7 @@
 beautifulsoup4==4.12.3
 Flask==3.1.0
 Requests==2.32.3
+diskcache==5.6.3
+nndownload==1.19
+boto3
+python-dotenv