Add another reason code

Separate HTML caches
Re-order download allowed logic
2025-03-30 00:46:24 -04:00 · 2025-03-04 10:34:30 -05:00 · 2025-03-04 10:24:26 -05:00 · 2025-03-04 10:23:38 -05:00 · 2025-03-04 10:22:54 -05:00 · 2025-03-03 22:06:15 -05:00
4 changed files with 500 additions and 46 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ venv
 .venv
 __pycache__
 cookies.txt
+.env
--- a/app.py
+++ b/app.py
@ -1,90 +1,452 @@
 import os
 import http.cookiejar
 import json
+import re
 import requests
 from bs4 import BeautifulSoup
-from flask import Flask, Response
+from flask import Flask, Response, request, jsonify
 from diskcache import Cache
 import logging

-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
+import threading
+import time
+import tempfile
+import nndownload
+import boto3
+from botocore.client import Config as BotoConfig
+import urllib.parse

+from dotenv import load_dotenv
+load_dotenv()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s.%(msecs)03d - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
+logger = logging.getLogger(__name__)
+logger.setLevel(os.environ.get('NICONICOGAY_LOG', 'INFO').upper())
 app = Flask(__name__)

-CACHE_EXPIRATION_SECONDS = 3600  # 1 hour
+HOST = os.environ.get('NICONICOGAY_HOST', 'https://nicovideo.gay')
+S3_BUCKET_NAME = os.environ.get('NICONICOGAY_S3_BUCKET_NAME')
+S3_REGION = os.environ.get('NICONICOGAY_S3_REGION')
+CDN_BASE_URL = os.environ.get('NICONICOGAY_CDN_BASE_URL')
+MAX_CONCURRENT_DOWNLOADS = 3
+CACHE_EXPIRATION_HTML = 60 * 60  # 1 hour
+CACHE_EXPIRATION_CDN = 60 * 60 * 24 * 7  # 1 week
 CACHE_SIZE_LIMIT = 100 * 1024 * 1024  # 100 MB
-cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)

+cache = None
+if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '1':
+    cache = Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
+    logger.debug("Using disk cache")
+else:
+    logger.info("Disk cache disabled")
+
+user_session = None
 cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
 try:
    cookie_jar.load(ignore_discard=True, ignore_expires=True)
+    user_session = next((cookie.value for cookie in cookie_jar if cookie.name == 'user_session'), None)
 except FileNotFoundError:
-    logger.warning("cookies.txt not found, starting with empty cookie jar")
+    logger.info("cookies.txt not found, starting with empty cookie jar")

 s = requests.Session()
 s.headers.update({
-    "User-Agent": "Twitterbot/1.0"
+    "User-Agent": os.environ.get('NICONICOGAY_USER_AGENT', 'Twitterbot/1.0')
 })
 s.cookies = cookie_jar  # type: ignore

-@app.route("/watch/<video_id>")
-def proxy(video_id):
-    logger.info(f"Received request for video ID: {video_id}")
+s3_client = None
+if all(key in os.environ for key in [
+    'NICONICOGAY_S3_ACCESS_KEY',
+    'NICONICOGAY_S3_SECRET_KEY',
+]):
+    s3_session = boto3.Session()
+    s3_client = s3_session.client(
+        's3',
+        aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
+        aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
+        region_name=S3_REGION,
+        endpoint_url=f"https://{S3_REGION}.digitaloceanspaces.com",
+        config=BotoConfig(s3={'addressing_style': 'virtual'}),
+    )
+else:
+    logger.info("S3 credentials not provided. Videos will not be downloaded.")

-    if cache:
-        cached_html = cache.get(video_id)
-        if cached_html is not None:
-            logger.info(f"Using cached response for video ID: {video_id}")
-            return Response(cached_html, mimetype="text/html")  # type: ignore
+download_tracker = {
+    'active_downloads': 0,
+    'in_progress': set(),
+}
+download_lock = threading.Lock()
+download_queue = []

-    # Not in cache or cache expired; fetch from nicovideo.jp
-    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
+def download_and_upload_video(video_id, url, video_quality):
    try:
-        logger.info(f"Fetching content from URL: {real_url}")
+        with download_lock:
+            download_tracker['active_downloads'] += 1
+            download_tracker['in_progress'].add(video_id)
+        
+        with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as temp_file:
+            temp_path = temp_file.name
+        
+        try:
+            logger.info(f"{video_id}: Starting download")
+            nndownload_args = [
+                "--no-login",
+                "--user-agent", "Googlebot/2.1",
+                "--video-quality", video_quality,
+                "--output-path", temp_path,
+                url
+            ]
+            if user_session:
+                nndownload_args += ["--session-cookie", user_session]
+                nndownload_args = nndownload_args[1:]
+            nndownload.execute(*nndownload_args)
+            
+            if os.path.exists(temp_path) and s3_client and S3_BUCKET_NAME:
+                logger.info(f"{video_id}: Downloaded, uploading to CDN")
+                try:
+                    s3_key = f"niconico/{video_id}.mp4"
+                    s3_client.upload_file(
+                        temp_path, 
+                        S3_BUCKET_NAME, 
+                        s3_key,
+                        ExtraArgs={'ContentType': 'video/mp4', 'ACL': 'public-read'}
+                    )
+                    
+                    logger.info(f"{video_id}: Upload successful to CDN")
+                    
+                    if cache is not None:
+                        cache.set(f"{video_id}_cdn", True, expire=CACHE_EXPIRATION_CDN)
+                        # Clear HTML cache for this video to ensure next view gets updated HTML
+                        cache.delete(f"{video_id}_html")
+                        logger.debug(f"{video_id}: Cleared HTML cache")
+                    
+                    return True
+                except Exception as e:
+                    logger.error(f"{video_id}: Error uploading to CDN: {e}")
+                    return False
+            else:
+                logger.error(f"{video_id}: Failed to download or S3 client not configured")
+                return False
+        finally:
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
+                logger.debug(f"Removed temporary file: {temp_path}")
+    except Exception as e:
+        logger.error(f"{video_id}: Error in download process: {e}")
+        return False
+    finally:
+        with download_lock:
+            download_tracker['active_downloads'] -= 1
+            download_tracker['in_progress'].discard(video_id)
+
+def download_worker():
+    while True:
+        try:
+            with download_lock:
+                can_download = download_tracker['active_downloads'] < MAX_CONCURRENT_DOWNLOADS
+                queue_has_items = len(download_queue) > 0
+            
+            if queue_has_items and can_download:
+                with download_lock:
+                    # Get next video that is not already being downloaded
+                    for i, (video_id, _, _) in enumerate(download_queue):
+                        if video_id not in download_tracker['in_progress']:
+                            video_info = download_queue.pop(i)
+                            threading.Thread(target=download_and_upload_video, 
+                                            args=(video_info[0], video_info[1], video_info[2])).start()
+                            break
+            time.sleep(1)
+        except Exception as e:
+            logger.error(f"Error in download worker: {e}")
+            time.sleep(5)  # Back off in case of error
+
+worker_thread = threading.Thread(target=download_worker, daemon=True)
+worker_thread.start()
+
+def is_video_in_cdn(video_id):
+    """Check if video exists in CDN"""
+    if cache is not None and cache.get(f"{video_id}_cdn"):
+        logger.debug(f"{video_id}: Already uploaded to CDN (cached)")
+        return True
+
+    if not s3_client or not S3_BUCKET_NAME:
+        logger.warning("S3 client not configured. Cannot check if video exists in CDN.")
+        return False
+
+    try:
+        s3_client.head_object(Bucket=S3_BUCKET_NAME, Key=f"niconico/{video_id}.mp4")
+        return True
+    except Exception:
+        return False
+    
+def is_video_being_downloaded(video_id):
+    """Check if video is currently being downloaded"""
+    with download_lock:
+        return video_id in download_tracker['in_progress']
+
+def get_cdn_url(video_id):
+    """Get the CDN URL for a video"""
+    return f"{CDN_BASE_URL}/niconico/{video_id}.mp4"
+
+def get_video_resolution(params):
+    if not params:
+        return None, None
+    video = params['media']['domand']['videos'][0]
+    return video['width'], video['height']
+
+def get_video_quality(params, quality_level_threshold=3):
+    """Get the code of the best video quality available (optionally below a certain threshold)"""
+    videos = params['media']['domand']['videos']
+    eligible_videos = [v for v in videos if v['qualityLevel'] < quality_level_threshold]
+    if not eligible_videos:
+        return None
+    return str(max(eligible_videos, key=lambda x: int(x['qualityLevel']))['id'])
+
+def get_data(video_id, real_url):
+    """Get the server response for a given video ID"""
+    try:
+        logger.debug(f"{video_id}: Fetching content from URL: {real_url}")
        r = s.get(real_url, timeout=10)
+        # r.raise_for_status()
    except requests.RequestException as e:
-        logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
-        return Response(status=500)
+        logger.error(f"{video_id}: Error fetching the page ('{real_url}'): {e}")
+        return None, None

    soup = BeautifulSoup(r.text, "html.parser")
-    thumbnail_url = None
    try:
        server_response = soup.find("meta", {"name": "server-response"})
        if server_response:
            params = json.loads(server_response["content"])["data"]["response"]  # type: ignore
-            thumbnail_url = (
-                params["video"]["thumbnail"].get("ogp") or
-                params["video"]["thumbnail"].get("player") or
-                params["video"]["thumbnail"].get("largeUrl") or
-                params["video"]["thumbnail"].get("middleUrl") or
-                params["video"]["thumbnail"].get("url")
-            )
+            return params, soup
    except (KeyError, json.JSONDecodeError) as e:
-        logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
+        logger.warning(f"{video_id}: Failed to extract thumbnail info: {e}")
        pass

-    og_tags = soup.find_all("meta", property=lambda x: x)  # type: ignore
+    return None, soup
+
+def human_format(num):
+    """Format a number in a human-readable way (e.g., 1K, 2M, etc.)"""
+    if num is None:
+        return None
+    num = float('{:.3g}'.format(num))
+    magnitude = 0
+    while abs(num) >= 1000:
+        magnitude += 1
+        num /= 1000.0
+    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
+
+def get_oembed_url(params):
+    """Get the oEmbed (/owoembed) URL based on the given params (server response)"""
+    if not params:
+        return None
+
+    author_id = None
+    author_name = None
+    if params.get('owner'):
+        author_id = params['owner'].get('id')
+        author_name = params['owner'].get('nickname')
+    video_id = params.get('video', {}).get('id')
+
+    if not video_id:
+        return None
+
+    view_count = human_format(params.get('video', {}).get('count', {}).get('view')) or "n/a"
+    comment_count = human_format(params.get('video', {}).get('count', {}).get('comment')) or "n/a"
+    like_count = human_format(params.get('video', {}).get('count', {}).get('like')) or "n/a"
+    mylist_count = human_format(params.get('video', {}).get('count', {}).get('mylist')) or "n/a"
+    provder_stats = f"👁️ {view_count}   💬 {comment_count}   ❤️ {like_count}   📝 {mylist_count}"
+
+    author_name_encoded = urllib.parse.quote(author_name) if author_name else ""
+    provider_stats_encoded = urllib.parse.quote(provder_stats)
+
+    oembed_url = (
+        f"{HOST}/owoembed?"
+        f"author_id={author_id if author_id else ''}&"
+        f"author_name={author_name_encoded}&"
+        f"video_id={video_id}&"
+        f"provider={provider_stats_encoded}"
+    )
+
+    return oembed_url
+
+
+
+@app.route("/watch/<video_id>")
+def proxy(video_id):
+    logger.info(f"{video_id}: Received request")
+
+    cache_html_suffix = "_html"
+    request_user_agent = request.headers.get('User-Agent', '').lower()
+    if 'twitterbot' in request_user_agent:
+        cache_html_suffix = "_html_twitterbot"
+    elif 'discordbot' in request_user_agent:
+        cache_html_suffix = "_html_discordbot"
+
+    if cache is not None:
+        logger.debug(f"{video_id}: Checking cache")
+        cached_html = cache.get(f"{video_id}{cache_html_suffix}")
+        if cached_html is not None:
+            logger.info(f"{video_id}: Returning cached response")
+            return Response(cached_html, mimetype="text/html")  # type: ignore
+
+    logger.debug(f"{video_id}: Cache miss - fetching")
+
+    # Not in cache or cache expired; fetch from nicovideo.jp
+    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
+    params, soup = get_data(video_id, real_url)
+
+    if not params or not soup:
+        logger.error(f"{video_id}: Failed to fetch data")
+        return Response("Video not found", status=404)
+
+    reason_code = params.get('reasonCode', '').upper()
+    if reason_code in ['HIDDEN_VIDEO', 'ADMINISTRATOR_DELETE_VIDEO', 'RIGHT_HOLDER_DELETE_VIDEO', 'DELETED_VIDEO']:
+        logger.warning(f"{video_id}: Video is hidden or deleted ({reason_code}) - returning 404")
+        return Response("Video not found", status=404)
+
+    thumbnail_url = (
+        params["video"]["thumbnail"].get("ogp") or
+        params["video"]["thumbnail"].get("player") or
+        params["video"]["thumbnail"].get("largeUrl") or
+        params["video"]["thumbnail"].get("middleUrl") or
+        params["video"]["thumbnail"].get("url")
+    ) if params else None
+    video_width, video_height = get_video_resolution(params) if params else (None, None)
+
+    download_allowed = True
+    if download_allowed and 'discordbot' not in request_user_agent:
+        logger.info(f"{video_id}: Video download ignored due to user agent ({request_user_agent})")
+        download_allowed = False
+    if params['video']['duration'] > 60 * 20:  # 20 minutes
+        logger.info(f"{video_id}: Video download ignored due to duration ({params['video']['duration']} seconds)")
+        download_allowed = False
+    video_quality = get_video_quality(params) if params else None
+    if download_allowed and video_quality is not None:
+        video_in_cdn = is_video_in_cdn(video_id)
+        video_in_progress = is_video_being_downloaded(video_id)
+        if not video_in_cdn and not video_in_progress and s3_client:
+            with download_lock:
+                # Add to queue if not already in it
+                queue_video_ids = [item[0] for item in download_queue]
+                if video_id not in queue_video_ids:
+                    download_queue.append((video_id, real_url, video_quality))
+                    logger.info(f"{video_id}: Queued for download")
+
+    cdn_video_url = get_cdn_url(video_id)
+    og_tags = soup.find_all("meta", attrs={"property": True})
+    if len(og_tags) == 0:
+        logger.warning(f"{video_id}: No Open Graph tags found")
+    og_title = None
+    og_description = None
+    og_category = None
    for tag in og_tags:
+        # Remove attribute(s) added by niconico
+        if 'data-server' in tag.attrs:
+            del tag.attrs['data-server']
+        # Set title
+        if tag.get("property") == "og:title":
+            og_title = tag["content"]
+        # Set description
+        if tag.get("property") == "og:description":
+            og_description = tag["content"]
+            if og_description and og_title:
+                # The description is formatted like "Title [Category] Description"
+                # Extract category (just incase this is useful later), and keep only the description part.
+                match = re.search(rf"^{re.escape(og_title)}(\s+\[(.*?)\])?\s+(.*)", og_description)
+                if match:
+                    og_category = match.group(2) if match.group(2) else None
+                    og_description = match.group(3)
+                    tag["content"] = og_description
        # Fix thumbnail
        if tag.get("property") == "og:image" and thumbnail_url:
            tag["content"] = thumbnail_url
+        # Fix video URL
+        if tag.get("property") == "og:video:url" or tag.get("property") == "og:video:secure_url":
+            tag["content"] = cdn_video_url

-    og_tags_str = "\n".join(str(tag) for tag in og_tags)
-    html_response = f"""
-        <!DOCTYPE html>
-        <html lang="en">
-        <head>
-            <meta charset="UTF-8">
-            {og_tags_str}
-        </head>
-        <body>
-        </body>
-        </html>
-    """
+    og_tags_str = "\n".join(str(tag) for tag in og_tags if tag.get("property") not in ["og:site_name"])
+    og_tags_str += '\n<meta content="ニコニコ動画" property="og:site_name"/>'
+    og_tags_str += f'\n<link rel="alternate" href="{get_oembed_url(params)}" type="application/json+oembed" title="{video_id}"/>'

-    if cache:
-        logging.info(f"Caching response for video ID: {video_id}")
-        cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS)
+    # Discord seems to ignore video URLs when Twitter meta tags are present,
+    # so in addition to including these when the User Agent is a Twitterbot,
+    # we also include them when the video is too long to download in order to remove the play button.
+    if 'twitterbot' in request_user_agent or not download_allowed:
+        if 'twitterbot' in request_user_agent:
+            logger.info(f"{video_id}: Twitterbot detected - adding Twitter tags")
+        elif not download_allowed:
+            logger.info(f"{video_id}: Video too long to download - will not show play button")
+        og_tags_str += f'\n<meta content="{thumbnail_url}" property="twitter:image"/>'
+        og_tags_str += '\n<meta content="summary_large_image" property="twitter:card"/>'
+        og_tags_str += '\n<meta content="www.nicovideo.gay" name="twitter:domain"/>'
+        og_tags_str += f'\n<meta content="{request.url}" name="twitter:url"/>'
+        if og_title:
+            og_tags_str += f'\n<meta content="{og_title}" name="twitter:title"/>'
+        if og_description:
+            og_tags_str += f'\n<meta content="{og_description}" name="twitter:description"/>'
+    # og_tags_str += '\n<meta content="video/mp4" property="twitter:player:stream:content_type"/>'
+    # og_tags_str += f'\n<meta content="{cdn_video_url}" property="twitter:player:stream"/>'
+    # if video_width:
+    #     og_tags_str += f'\n<meta content="{video_width}" property="twitter:player:width"/>'
+    # if video_height:
+    #     og_tags_str += f'\n<meta content="{video_height}" property="twitter:player:height"/>'
+    html_response = f"""<!DOCTYPE html>
+<!--
+niconico proxy - brought to you by https://mmaker.moe

+this service is intended to be used by social media open graph embed generators and discordbot.
+please do not abuse! the videos returned by the CDN are lower quality and intended to only be proxied by discord, not hotlinked.
+if you want to download videos, please consider using a tool like nndownload: https://github.com/AlexAplin/nndownload
+-->
+<html lang="en"><head><meta charset="UTF-8">
+{og_tags_str}
+</head><body></body></html>"""
+
+    if cache is not None:
+        logger.info(f"{video_id}: Caching HTML response")
+        cache.set(f"{video_id}{cache_html_suffix}", html_response, expire=CACHE_EXPIRATION_HTML)
+
+    logger.info(f"{video_id}: Returning response")
+    logger.debug(f"{video_id}: HTML response:\n----------\n{html_response}\n----------")
    return Response(html_response, mimetype="text/html")
+
+@app.route("/owoembed")
+def owoembed():
+    """
+    Handles oEmbed requests with parameters in the URL
+    Returns JSON payload in oEmbed format
+    """
+    logger.info("Received request for oEmbed endpoint")
+    
+    # Get parameters from query string
+    author_id = request.args.get('author_id', '')
+    author_name = request.args.get('author_name', '')
+    video_id = request.args.get('video_id', '')
+    provider = request.args.get('provider', '')
+    
+    author_name_decoded = urllib.parse.unquote(author_name)
+    provider_decoded = urllib.parse.unquote(provider)
+    
+    # Create the author_url and provider_url
+    author_url = f"https://www.nicovideo.jp/user/{author_id}"
+    video_url = f"https://www.nicovideo.jp/watch/{video_id}"
+    
+    # Create oEmbed response
+    oembed_response = {
+        "author_name": author_name_decoded,
+        "author_url": author_url,
+        "provider_name": provider_decoded,
+        "provider_url": video_url,
+        "title": "Embed",
+        "type": "link",
+        "version": "1.0"
+    }
+    
+    logger.info(f"{video_id}: Returning oEmbed response")
+    logger.debug(f"{video_id}: oEmbed response:\n----------\n{json.dumps(oembed_response, indent=2)}\n----------")
+    return jsonify(oembed_response)
--- a/clean.py
+++ b/clean.py
@ -0,0 +1,87 @@
+import os
+import sys
+import datetime
+import argparse
+import logging
+import boto3
+from botocore.client import Config as BotoConfig
+from dotenv import load_dotenv
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+
+def cleanup_old_files(dry_run=False, days=7, directory_prefix="niconico"):
+    required_env_vars = [
+        'NICONICOGAY_S3_ACCESS_KEY',
+        'NICONICOGAY_S3_SECRET_KEY',
+        'NICONICOGAY_S3_BUCKET_NAME',
+        'NICONICOGAY_S3_REGION'
+    ]
+    missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
+    if missing_vars:
+        logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
+        sys.exit(1)
+    
+    try:
+        s3_session = boto3.Session()
+        s3_client = s3_session.client(
+            's3',
+            aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
+            aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
+            region_name=os.environ['NICONICOGAY_S3_REGION'],
+            endpoint_url=f"https://{os.environ['NICONICOGAY_S3_REGION']}.digitaloceanspaces.com",
+            config=BotoConfig(s3={'addressing_style': 'virtual'}),
+        )
+        
+        bucket_name = os.environ['NICONICOGAY_S3_BUCKET_NAME']
+        cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)
+        paginator = s3_client.get_paginator('list_objects_v2')
+        page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=f"{directory_prefix}/")
+        
+        total_files = 0
+        objects_to_delete = []
+        
+        for page in page_iterator:
+            if 'Contents' not in page:
+                continue
+            
+            for obj in page['Contents']:
+                total_files += 1
+                if obj['LastModified'] < cutoff_date:  # type: ignore
+                    objects_to_delete.append({'Key': obj['Key']})  # type: ignore
+
+        if len(objects_to_delete) == 0:
+            logger.info("No files to delete")
+            return
+        
+        if dry_run:
+            logger.info(f"DRY RUN: Would delete {len(objects_to_delete)} out of {total_files} files")
+        else:
+            # Delete files in batches of 1000 (S3 limit?)
+            for i in range(0, len(objects_to_delete), 1000):
+                batch = objects_to_delete[i:i+1000]
+                s3_client.delete_objects(
+                    Bucket=bucket_name,
+                    Delete={'Objects': batch}
+                )
+            logger.info(f"Successfully deleted {len(objects_to_delete)} out of {total_files} files")
+            
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    load_dotenv()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dry-run", 
+        action="store_true", 
+        help="Show what would be deleted without actually deleting anything"
+    )
+    args = parser.parse_args()
+    
+    cleanup_old_files(dry_run=args.dry_run)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,7 @@
 beautifulsoup4==4.12.3
 Flask==3.1.0
 Requests==2.32.3
+diskcache==5.6.3
+nndownload==1.19
+boto3
+python-dotenv
Author	SHA1	Message	Date
MMaker	e43a67b0d5	Add another reason code	2025-03-30 00:46:24 -04:00
MMaker	71fa5ad6b6	Separate HTML caches	2025-03-04 10:34:30 -05:00
MMaker	abbe5c3bd9	Re-order download allowed logic	2025-03-04 10:24:26 -05:00
MMaker	8089130b7e	Only log "too long" if not a Twitterbot	2025-03-04 10:23:38 -05:00
MMaker	d8da55520f	Handle another reason code	2025-03-04 10:22:54 -05:00
MMaker	a31d7d5a90	Add extra logging	2025-03-03 22:06:15 -05:00
MMaker	df8537e811	Remove play button for videos not downloaded	2025-03-03 22:03:32 -05:00
MMaker	070eed8f41	Print oEmbed response in debug	2025-03-03 13:38:29 -05:00
MMaker	36c34bd4f7	Better description modification	2025-03-03 13:28:44 -05:00
MMaker	3456e74afb	Extra newline in log	2025-03-03 13:17:32 -05:00
MMaker	d770c7df41	Set log level via env var	2025-03-03 13:16:34 -05:00
MMaker	b0b552ee82	Log HTML response	2025-03-03 13:15:19 -05:00
MMaker	6a78b81084	Remove title and category from OG description	2025-03-03 13:14:12 -05:00
MMaker	aa755dc186	Try to fix oEmbed	2025-03-03 13:06:22 -05:00
MMaker	a075a5a7a2	Warning if no OG tags found	2025-03-03 13:01:26 -05:00
MMaker	255a12fcc6	Attempt to fix Twitter card display v2	2025-03-03 12:49:17 -05:00
MMaker	a07da68e4f	Revert "Attempt to fix Twitter card display" This reverts commit 6e41e842fd0f1ebacf6001ef8125465708f080a2.	2025-03-03 12:46:25 -05:00
MMaker	6e41e842fd	Attempt to fix Twitter card display	2025-03-03 12:44:40 -05:00
MMaker	eccea59070	Revert experimental placeholder video Discord seems to cache the video regardless of what it's told.	2025-02-27 16:07:14 -05:00
MMaker	753d4c691a	Try to prevent caching of placeholder	2025-02-27 16:05:39 -05:00
MMaker	bca73594f5	Fix again	2025-02-27 15:56:20 -05:00
MMaker	7cbc5f84c9	Fix placeholder video return	2025-02-27 15:53:05 -05:00
MMaker	9905d91479	Fix for Discord user agent for files	2025-02-27 15:40:34 -05:00
MMaker	2ade81b3be	oops	2025-02-27 15:35:38 -05:00
MMaker	86b490bab1	fix	2025-02-27 15:33:07 -05:00
MMaker	5d5588f4f5	Experimental placeholder video functionality	2025-02-27 15:15:11 -05:00
MMaker	d8ffe43857	Clearer log	2025-02-27 13:18:30 -05:00
MMaker	dd95661352	Handle deleted videos	2025-02-27 13:14:09 -05:00
MMaker	19befc9eb5	Refactor out allow check	2025-02-27 12:52:37 -05:00
MMaker	96326f543f	Better log phrasing	2025-02-27 12:39:38 -05:00
MMaker	c6d53e0c1c	Opposite	2025-02-27 12:08:35 -05:00
MMaker	0b8f0dc1b9	Try to ignore connection pool warnings	2025-02-27 12:03:17 -05:00
MMaker	e6d7278624	Ignore hidden videos	2025-02-27 11:53:51 -05:00
MMaker	2ca6d6aa73	Better cache control, linter cleanup	2025-02-27 11:38:48 -05:00
MMaker	cc21a2322e	Granular time logging	2025-02-27 11:32:18 -05:00
MMaker	c7a2ae2b6e	nit	2025-02-27 11:31:33 -05:00
MMaker	c120d9ba92	Tweak response logging level	2025-02-27 11:30:58 -05:00
MMaker	419dd19faa	Clean up logging levels	2025-02-27 11:30:12 -05:00
MMaker	4ac1fba240	Found the real cache issue :^)	2025-02-27 11:24:39 -05:00
MMaker	c3ceb007f3	Found the issue :^)	2025-02-27 11:19:38 -05:00
MMaker	e2d6cabed5	More logging	2025-02-27 11:18:24 -05:00
MMaker	246de3e29d	Add diskcache logging	2025-02-27 11:16:14 -05:00
MMaker	6e95c1dd52	Add some logs	2025-02-27 11:13:50 -05:00
MMaker	1963ba53d9	Cache video CDN status	2025-02-27 11:12:42 -05:00
MMaker	8f222ff957	Cleaner log format	2025-02-27 11:01:48 -05:00
MMaker	ae803c0fe0	Make S3 optional	2025-02-27 10:54:51 -05:00
MMaker	aa836a4f55	oops	2025-02-27 10:48:46 -05:00
MMaker	ac86c5f5ee	Only download video if request from Discord	2025-02-27 10:29:36 -05:00
MMaker	fe5c547055	Pass in user session for nndownload	2025-02-27 07:43:10 -05:00
MMaker	1ce10dfae4	Remove raise for status check temporarily	2025-02-27 06:58:15 -05:00
MMaker	1802eeffe3	Add cleanup script	2025-02-26 13:36:36 -05:00
MMaker	71b7dac492	Remove tags that might be breaking Twitter	2025-02-26 08:45:20 -05:00
MMaker	5a194507a0	Get correct video dimensions	2025-02-26 00:31:51 -05:00
MMaker	77f9545db6	Fixes	2025-02-25 18:21:29 -05:00
MMaker	c456200ae0	Handle unknown videos	2025-02-25 18:12:13 -05:00
MMaker	8ebacc84b0	Try to fix site name again	2025-02-25 18:10:12 -05:00
MMaker	b3539d7a47	Format numbers	2025-02-25 18:08:15 -05:00
MMaker	3201aea856	Change mylist emoji	2025-02-25 18:06:46 -05:00
MMaker	be23df3591	Meta tag cleanup Maybe fixes site title not displaying?	2025-02-25 18:04:59 -05:00
MMaker	e48159ce14	owoembed (oEmbed)	2025-02-25 17:56:06 -05:00
MMaker	e9eb88c13f	Add more meta tags	2025-02-25 17:23:50 -05:00
MMaker	add8f1bbde	Load .env	2025-02-25 16:28:12 -05:00
MMaker	e532c45a92	Add video proxying support Requires S3 setup	2025-02-25 16:23:31 -05:00