Add video proxying support

Requires S3 setup
This commit is contained in:
MMaker 2025-02-25 16:23:31 -05:00
parent 91355d7ff1
commit e532c45a92
Signed by: mmaker
GPG Key ID: CCE79B8FEDA40FB2
2 changed files with 172 additions and 2 deletions

171
app.py
View File

@ -7,13 +7,25 @@ from flask import Flask, Response
from diskcache import Cache from diskcache import Cache
import logging import logging
import threading
import time
import tempfile
import nndownload
import boto3
from botocore.client import Config as BotoConfig
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
app = Flask(__name__) app = Flask(__name__)
S3_BUCKET_NAME = os.environ.get('NICONICOGAY_S3_BUCKET_NAME')
S3_REGION = os.environ.get('NICONICOGAY_S3_REGION')
CDN_BASE_URL = os.environ.get('NICONICOGAY_CDN_BASE_URL')
MAX_CONCURRENT_DOWNLOADS = 3
CACHE_EXPIRATION_SECONDS = 3600 # 1 hour CACHE_EXPIRATION_SECONDS = 3600 # 1 hour
CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB
cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT) cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt') cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
@ -24,10 +36,147 @@ except FileNotFoundError:
s = requests.Session() s = requests.Session()
s.headers.update({ s.headers.update({
"User-Agent": "Twitterbot/1.0" "User-Agent": os.environ.get('NICONICOGAY_USER_AGENT', 'Twitterbot/1.0')
}) })
s.cookies = cookie_jar # type: ignore s.cookies = cookie_jar # type: ignore
if all(key in os.environ for key in [
'NICONICOGAY_S3_ACCESS_KEY',
'NICONICOGAY_S3_SECRET_KEY',
]):
s3_session = boto3.Session()
s3_client = s3_session.client(
's3',
aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
region_name=S3_REGION,
endpoint_url=f"https://{S3_REGION}.digitaloceanspaces.com",
config=BotoConfig(s3={'addressing_style': 'virtual'}),
)
else:
logger.warning("S3 credentials not provided, exiting")
exit(1)
download_tracker = {
'active_downloads': 0,
'in_progress': set(),
}
download_lock = threading.Lock()
download_queue = []
def download_and_upload_video(video_id, url, video_quality):
try:
with download_lock:
download_tracker['active_downloads'] += 1
download_tracker['in_progress'].add(video_id)
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as temp_file:
temp_path = temp_file.name
try:
logger.info(f"Starting download for video ID: {video_id}")
nndownload.execute(
"--no-login",
"--user-agent", "Googlebot/2.1",
"--video-quality", video_quality,
"--output-path", temp_path,
url
)
if os.path.exists(temp_path) and s3_client:
logger.info(f"Downloaded video {video_id}, uploading to CDN")
try:
s3_key = f"niconico/{video_id}.mp4"
s3_client.upload_file(
temp_path,
S3_BUCKET_NAME,
s3_key,
ExtraArgs={'ContentType': 'video/mp4', 'ACL': 'public-read'}
)
logger.info(f"Successfully uploaded video {video_id} to CDN")
# Clear cache for this video to ensure next view gets updated HTML
if cache:
cache.delete(video_id)
logger.info(f"Cleared cache for video ID: {video_id}")
return True
except Exception as e:
logger.error(f"Error uploading video {video_id} to CDN: {e}")
return False
else:
logger.error(f"Failed to download video {video_id} or S3 client not configured")
return False
finally:
if os.path.exists(temp_path):
os.unlink(temp_path)
logger.info(f"Removed temporary file: {temp_path}")
except Exception as e:
logger.error(f"Error in download process for video {video_id}: {e}")
return False
finally:
with download_lock:
download_tracker['active_downloads'] -= 1
download_tracker['in_progress'].discard(video_id)
def download_worker():
while True:
try:
with download_lock:
can_download = download_tracker['active_downloads'] < MAX_CONCURRENT_DOWNLOADS
queue_has_items = len(download_queue) > 0
if queue_has_items and can_download:
with download_lock:
# Get next video that is not already being downloaded
for i, (video_id, _, _) in enumerate(download_queue):
if video_id not in download_tracker['in_progress']:
video_info = download_queue.pop(i)
threading.Thread(target=download_and_upload_video,
args=(video_info[0], video_info[1], video_info[2])).start()
break
time.sleep(1)
except Exception as e:
logger.error(f"Error in download worker: {e}")
time.sleep(5) # Back off in case of error
worker_thread = threading.Thread(target=download_worker, daemon=True)
worker_thread.start()
def is_video_in_cdn(video_id):
"""Check if video exists in CDN"""
if not s3_client:
return False
try:
s3_client.head_object(Bucket=S3_BUCKET_NAME, Key=f"niconico/{video_id}.mp4")
return True
except Exception:
return False
def is_video_being_downloaded(video_id):
"""Check if video is currently being downloaded"""
with download_lock:
return video_id in download_tracker['in_progress']
def get_cdn_url(video_id):
"""Get the CDN URL for a video"""
return f"{CDN_BASE_URL}/niconico/{video_id}.mp4"
def allow_download(params):
if params['video']['duration'] > 60 * 15:
return False
return True
def get_video_quality(params, quality_level_threshold=3):
"""Get the code of the best video quality available (optionally below a certain threshold)"""
videos = params['media']['domand']['videos']
eligible_videos = [v for v in videos if v['qualityLevel'] < quality_level_threshold]
if not eligible_videos:
return None
return str(max(eligible_videos, key=lambda x: int(x['qualityLevel']))['id'])
@app.route("/watch/<video_id>") @app.route("/watch/<video_id>")
def proxy(video_id): def proxy(video_id):
logger.info(f"Received request for video ID: {video_id}") logger.info(f"Received request for video ID: {video_id}")
@ -43,6 +192,7 @@ def proxy(video_id):
try: try:
logger.info(f"Fetching content from URL: {real_url}") logger.info(f"Fetching content from URL: {real_url}")
r = s.get(real_url, timeout=10) r = s.get(real_url, timeout=10)
r.raise_for_status()
except requests.RequestException as e: except requests.RequestException as e:
logger.error(f"Error fetching the page for video ID '{video_id}': {e}") logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
return Response(status=500) return Response(status=500)
@ -64,11 +214,28 @@ def proxy(video_id):
logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}") logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
pass pass
download_allowed = allow_download(params) if params else False
video_quality = get_video_quality(params) if params else None
if download_allowed and video_quality is not None:
video_in_cdn = is_video_in_cdn(video_id)
video_in_progress = is_video_being_downloaded(video_id)
if not video_in_cdn and not video_in_progress and s3_client:
with download_lock:
# Add to queue if not already in it
queue_video_ids = [item[0] for item in download_queue]
if video_id not in queue_video_ids:
download_queue.append((video_id, real_url, video_quality))
logger.info(f"Queued video ID {video_id} for download")
cdn_video_url = get_cdn_url(video_id)
og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore
for tag in og_tags: for tag in og_tags:
# Fix thumbnail # Fix thumbnail
if tag.get("property") == "og:image" and thumbnail_url: if tag.get("property") == "og:image" and thumbnail_url:
tag["content"] = thumbnail_url tag["content"] = thumbnail_url
# Fix video URL
if tag.get("property") == "og:video:url" or tag.get("property") == "og:video:secure_url":
tag["content"] = cdn_video_url
og_tags_str = "\n".join(str(tag) for tag in og_tags) og_tags_str = "\n".join(str(tag) for tag in og_tags)
html_response = f""" html_response = f"""

View File

@ -1,3 +1,6 @@
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
Flask==3.1.0 Flask==3.1.0
Requests==2.32.3 Requests==2.32.3
diskcache==5.6.3
nndownload==1.19
boto3