import http.cookiejar import json import requests from bs4 import BeautifulSoup from flask import Flask, Response from diskcache import Cache import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = Flask(__name__) CACHE_EXPIRATION_SECONDS = 3600 # 1 hour CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB cache = Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT) cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt') try: cookie_jar.load(ignore_discard=True, ignore_expires=True) except FileNotFoundError: logger.warning("cookies.txt not found, starting with empty cookie jar") s = requests.Session() s.headers.update({ "User-Agent": "Twitterbot/1.0" }) s.cookies = cookie_jar # type: ignore @app.route("/watch/") def proxy(video_id): logger.info(f"Received request for video ID: {video_id}") cached_html = cache.get(video_id) if cached_html is not None: logger.info(f"Using cached response for video ID: {video_id}") return Response(cached_html, mimetype="text/html") # type: ignore # Not in cache or cache expired; fetch from nicovideo.jp real_url = f"https://www.nicovideo.jp/watch/{video_id}" try: logger.info(f"Fetching content from URL: {real_url}") r = s.get(real_url, timeout=10) except requests.RequestException as e: logger.error(f"Error fetching the page for video ID '{video_id}': {e}") return Response(status=500) soup = BeautifulSoup(r.text, "html.parser") thumbnail_url = None try: server_response = soup.find("meta", {"name": "server-response"}) if server_response: params = json.loads(server_response["content"])["data"]["response"] # type: ignore thumbnail_url = ( params["video"]["thumbnail"].get("ogp") or params["video"]["thumbnail"].get("player") or params["video"]["thumbnail"].get("largeUrl") or params["video"]["thumbnail"].get("middleUrl") or params["video"]["thumbnail"].get("url") ) except (KeyError, json.JSONDecodeError) as e: logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}") pass og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore for tag in og_tags: if tag.get("property") == "og:image" and thumbnail_url: tag["content"] = thumbnail_url og_tags_str = "\n".join(str(tag) for tag in og_tags) html_response = f""" {og_tags_str} """ cache.set(video_id, html_response, expire=CACHE_EXPIRATION_SECONDS) return Response(html_response, mimetype="text/html")