niconico-gay/app.py

import http.cookiejar
import json
import requests
from bs4 import BeautifulSoup
from flask import Flask, Response
from diskcache import Cache
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)

CACHE_EXPIRATION_SECONDS = 3600  # 1 hour
CACHE_SIZE_LIMIT = 100 * 1024 * 1024  # 100 MB
cache = Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)

cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
try:
    cookie_jar.load(ignore_discard=True, ignore_expires=True)
except FileNotFoundError:
    logger.warning("cookies.txt not found, starting with empty cookie jar")

s = requests.Session()
s.headers.update({
    "User-Agent": "Twitterbot/1.0"
})
s.cookies = cookie_jar  # type: ignore

@app.route("/watch/<video_id>")
def proxy(video_id):
    logger.info(f"Received request for video ID: {video_id}")

    cached_html = cache.get(video_id)
    if cached_html is not None:
        logger.info(f"Using cached response for video ID: {video_id}")
        return Response(cached_html, mimetype="text/html")  # type: ignore

    # Not in cache or cache expired; fetch from nicovideo.jp
    real_url = f"https://www.nicovideo.jp/watch/{video_id}"
    try:
        logger.info(f"Fetching content from URL: {real_url}")
        r = s.get(real_url, timeout=10)
    except requests.RequestException as e:
        logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
        return Response(status=500)

    soup = BeautifulSoup(r.text, "html.parser")
    thumbnail_url = None
    try:
        server_response = soup.find("meta", {"name": "server-response"})
        if server_response:
            params = json.loads(server_response["content"])["data"]["response"]  # type: ignore
            thumbnail_url = (
                params["video"]["thumbnail"].get("ogp") or
                params["video"]["thumbnail"].get("player") or
                params["video"]["thumbnail"].get("largeUrl") or
                params["video"]["thumbnail"].get("middleUrl") or
                params["video"]["thumbnail"].get("url")
            )
    except (KeyError, json.JSONDecodeError) as e:
        logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
        pass

    og_tags = soup.find_all("meta", property=lambda x: x)  # type: ignore
    for tag in og_tags:
        if tag.get("property") == "og:image" and thumbnail_url:
            tag["content"] = thumbnail_url

    og_tags_str = "\n".join(str(tag) for tag in og_tags)
    html_response = f"""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            {og_tags_str}
        </head>
        <body>
        </body>
        </html>
    """

    cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS)

    return Response(html_response, mimetype="text/html")
Load cookies from cookies.txt 2025-01-31 12:57:32 -05:00			`import http.cookiejar`
Parse and return thumbnail URL in server response 2025-01-31 13:26:45 -05:00			`import json`
first! 2025-01-31 12:37:28 -05:00			`import requests`
			`from bs4 import BeautifulSoup`
Add diskcache 2025-01-31 14:26:14 -05:00			`from flask import Flask, Response`
			`from diskcache import Cache`
Add logging 2025-01-31 14:32:19 -05:00			`import logging`

			`logging.basicConfig(level=logging.INFO)`
			`logger = logging.getLogger(__name__)`
first! 2025-01-31 12:37:28 -05:00
			`app = Flask(__name__)`
Load cookies from cookies.txt 2025-01-31 12:57:32 -05:00
Add diskcache 2025-01-31 14:26:14 -05:00			`CACHE_EXPIRATION_SECONDS = 3600 # 1 hour`
			`CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB`
			`cache = Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)`

Load cookies from cookies.txt 2025-01-31 12:57:32 -05:00			`cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')`
			`try:`
			`cookie_jar.load(ignore_discard=True, ignore_expires=True)`
			`except FileNotFoundError:`
Add logging 2025-01-31 14:32:19 -05:00			`logger.warning("cookies.txt not found, starting with empty cookie jar")`
Load cookies from cookies.txt 2025-01-31 12:57:32 -05:00
first! 2025-01-31 12:37:28 -05:00			`s = requests.Session()`
			`s.headers.update({`
			`"User-Agent": "Twitterbot/1.0"`
			`})`
Load cookies from cookies.txt 2025-01-31 12:57:32 -05:00			`s.cookies = cookie_jar # type: ignore`
first! 2025-01-31 12:37:28 -05:00
			`@app.route("/watch/<video_id>")`
			`def proxy(video_id):`
Add logging 2025-01-31 14:32:19 -05:00			`logger.info(f"Received request for video ID: {video_id}")`

Add diskcache 2025-01-31 14:26:14 -05:00			`cached_html = cache.get(video_id)`
			`if cached_html is not None:`
Add logging 2025-01-31 14:32:19 -05:00			`logger.info(f"Using cached response for video ID: {video_id}")`
Add diskcache 2025-01-31 14:26:14 -05:00			`return Response(cached_html, mimetype="text/html") # type: ignore`
first! 2025-01-31 12:37:28 -05:00
Add diskcache 2025-01-31 14:26:14 -05:00			`# Not in cache or cache expired; fetch from nicovideo.jp`
			`real_url = f"https://www.nicovideo.jp/watch/{video_id}"`
first! 2025-01-31 12:37:28 -05:00			`try:`
Add logging 2025-01-31 14:32:19 -05:00			`logger.info(f"Fetching content from URL: {real_url}")`
first! 2025-01-31 12:37:28 -05:00			`r = s.get(real_url, timeout=10)`
			`except requests.RequestException as e:`
Add logging 2025-01-31 14:32:19 -05:00			`logger.error(f"Error fetching the page for video ID '{video_id}': {e}")`
			`return Response(status=500)`
first! 2025-01-31 12:37:28 -05:00
			`soup = BeautifulSoup(r.text, "html.parser")`
Parse and return thumbnail URL in server response 2025-01-31 13:26:45 -05:00			`thumbnail_url = None`
Ignore missing thumbnail 2025-01-31 14:19:01 -05:00			`try:`
Add diskcache 2025-01-31 14:26:14 -05:00			`server_response = soup.find("meta", {"name": "server-response"})`
			`if server_response:`
			`params = json.loads(server_response["content"])["data"]["response"] # type: ignore`
			`thumbnail_url = (`
			`params["video"]["thumbnail"].get("ogp") or`
			`params["video"]["thumbnail"].get("player") or`
			`params["video"]["thumbnail"].get("largeUrl") or`
			`params["video"]["thumbnail"].get("middleUrl") or`
			`params["video"]["thumbnail"].get("url")`
Ignore missing thumbnail 2025-01-31 14:19:01 -05:00			`)`
Add logging 2025-01-31 14:32:19 -05:00			`except (KeyError, json.JSONDecodeError) as e:`
			`logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")`
Ignore missing thumbnail 2025-01-31 14:19:01 -05:00			`pass`
Add diskcache 2025-01-31 14:26:14 -05:00
Parse and return thumbnail URL in server response 2025-01-31 13:26:45 -05:00			`og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore`
			`for tag in og_tags:`
Ignore missing thumbnail 2025-01-31 14:19:01 -05:00			`if tag.get("property") == "og:image" and thumbnail_url:`
Parse and return thumbnail URL in server response 2025-01-31 13:26:45 -05:00			`tag["content"] = thumbnail_url`
Add diskcache 2025-01-31 14:26:14 -05:00
first! 2025-01-31 12:37:28 -05:00			`og_tags_str = "\n".join(str(tag) for tag in og_tags)`
			`html_response = f"""`
			`<!DOCTYPE html>`
			`<html lang="en">`
			`<head>`
Add diskcache 2025-01-31 14:26:14 -05:00			`<meta charset="UTF-8">`
			`{og_tags_str}`
first! 2025-01-31 12:37:28 -05:00			`</head>`
			`<body>`
			`</body>`
			`</html>`
			`"""`

Add diskcache 2025-01-31 14:26:14 -05:00			`cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS)`
first! 2025-01-31 12:37:28 -05:00
Add diskcache 2025-01-31 14:26:14 -05:00			`return Response(html_response, mimetype="text/html")`