niconico-gay/app.py

import os
import http.cookiejar
import json
import requests
from bs4 import BeautifulSoup
from flask import Flask, Response
from diskcache import Cache
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)

CACHE_EXPIRATION_SECONDS = 3600  # 1 hour
CACHE_SIZE_LIMIT = 100 * 1024 * 1024  # 100 MB
cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)

cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
try:
    cookie_jar.load(ignore_discard=True, ignore_expires=True)
except FileNotFoundError:
    logger.warning("cookies.txt not found, starting with empty cookie jar")

s = requests.Session()
s.headers.update({
    "User-Agent": "Twitterbot/1.0"
})
s.cookies = cookie_jar  # type: ignore

@app.route("/watch/<video_id>")
def proxy(video_id):
    logger.info(f"Received request for video ID: {video_id}")

    if cache:
        cached_html = cache.get(video_id)
        if cached_html is not None:
            logger.info(f"Using cached response for video ID: {video_id}")
            return Response(cached_html, mimetype="text/html")  # type: ignore

    # Not in cache or cache expired; fetch from nicovideo.jp
    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
    try:
        logger.info(f"Fetching content from URL: {real_url}")
        r = s.get(real_url, timeout=10)
    except requests.RequestException as e:
        logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
        return Response(status=500)

    soup = BeautifulSoup(r.text, "html.parser")
    thumbnail_url = None
    try:
        server_response = soup.find("meta", {"name": "server-response"})
        if server_response:
            params = json.loads(server_response["content"])["data"]["response"]  # type: ignore
            thumbnail_url = (
                params["video"]["thumbnail"].get("ogp") or
                params["video"]["thumbnail"].get("player") or
                params["video"]["thumbnail"].get("largeUrl") or
                params["video"]["thumbnail"].get("middleUrl") or
                params["video"]["thumbnail"].get("url")
            )
    except (KeyError, json.JSONDecodeError) as e:
        logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
        pass

    og_tags = soup.find_all("meta", property=lambda x: x)  # type: ignore
    for tag in og_tags:
        # Fix thumbnail
        if tag.get("property") == "og:image" and thumbnail_url:
            tag["content"] = thumbnail_url

    og_tags_str = "\n".join(str(tag) for tag in og_tags)
    html_response = f"""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            {og_tags_str}
        </head>
        <body>
        </body>
        </html>
    """

    if cache:
        logging.info(f"Caching response for video ID: {video_id}")
        cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS)

    return Response(html_response, mimetype="text/html")