Add video proxying support
Requires S3 setup
This commit is contained in:
parent
91355d7ff1
commit
e532c45a92
171
app.py
171
app.py
@ -7,13 +7,25 @@ from flask import Flask, Response
|
|||||||
from diskcache import Cache
|
from diskcache import Cache
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import tempfile
|
||||||
|
import nndownload
|
||||||
|
import boto3
|
||||||
|
from botocore.client import Config as BotoConfig
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
S3_BUCKET_NAME = os.environ.get('NICONICOGAY_S3_BUCKET_NAME')
|
||||||
|
S3_REGION = os.environ.get('NICONICOGAY_S3_REGION')
|
||||||
|
CDN_BASE_URL = os.environ.get('NICONICOGAY_CDN_BASE_URL')
|
||||||
|
MAX_CONCURRENT_DOWNLOADS = 3
|
||||||
CACHE_EXPIRATION_SECONDS = 3600 # 1 hour
|
CACHE_EXPIRATION_SECONDS = 3600 # 1 hour
|
||||||
CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB
|
CACHE_SIZE_LIMIT = 100 * 1024 * 1024 # 100 MB
|
||||||
|
|
||||||
cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
|
cache = None if os.environ.get('NICONICOGAY_DISABLE_CACHE', '') != '' else Cache("disk_cache", size_limit=CACHE_SIZE_LIMIT)
|
||||||
|
|
||||||
cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
|
cookie_jar = http.cookiejar.MozillaCookieJar('cookies.txt')
|
||||||
@ -24,10 +36,147 @@ except FileNotFoundError:
|
|||||||
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
s.headers.update({
|
s.headers.update({
|
||||||
"User-Agent": "Twitterbot/1.0"
|
"User-Agent": os.environ.get('NICONICOGAY_USER_AGENT', 'Twitterbot/1.0')
|
||||||
})
|
})
|
||||||
s.cookies = cookie_jar # type: ignore
|
s.cookies = cookie_jar # type: ignore
|
||||||
|
|
||||||
|
if all(key in os.environ for key in [
|
||||||
|
'NICONICOGAY_S3_ACCESS_KEY',
|
||||||
|
'NICONICOGAY_S3_SECRET_KEY',
|
||||||
|
]):
|
||||||
|
s3_session = boto3.Session()
|
||||||
|
s3_client = s3_session.client(
|
||||||
|
's3',
|
||||||
|
aws_access_key_id=os.environ['NICONICOGAY_S3_ACCESS_KEY'],
|
||||||
|
aws_secret_access_key=os.environ['NICONICOGAY_S3_SECRET_KEY'],
|
||||||
|
region_name=S3_REGION,
|
||||||
|
endpoint_url=f"https://{S3_REGION}.digitaloceanspaces.com",
|
||||||
|
config=BotoConfig(s3={'addressing_style': 'virtual'}),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning("S3 credentials not provided, exiting")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
download_tracker = {
|
||||||
|
'active_downloads': 0,
|
||||||
|
'in_progress': set(),
|
||||||
|
}
|
||||||
|
download_lock = threading.Lock()
|
||||||
|
download_queue = []
|
||||||
|
|
||||||
|
def download_and_upload_video(video_id, url, video_quality):
|
||||||
|
try:
|
||||||
|
with download_lock:
|
||||||
|
download_tracker['active_downloads'] += 1
|
||||||
|
download_tracker['in_progress'].add(video_id)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as temp_file:
|
||||||
|
temp_path = temp_file.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting download for video ID: {video_id}")
|
||||||
|
nndownload.execute(
|
||||||
|
"--no-login",
|
||||||
|
"--user-agent", "Googlebot/2.1",
|
||||||
|
"--video-quality", video_quality,
|
||||||
|
"--output-path", temp_path,
|
||||||
|
url
|
||||||
|
)
|
||||||
|
|
||||||
|
if os.path.exists(temp_path) and s3_client:
|
||||||
|
logger.info(f"Downloaded video {video_id}, uploading to CDN")
|
||||||
|
try:
|
||||||
|
s3_key = f"niconico/{video_id}.mp4"
|
||||||
|
s3_client.upload_file(
|
||||||
|
temp_path,
|
||||||
|
S3_BUCKET_NAME,
|
||||||
|
s3_key,
|
||||||
|
ExtraArgs={'ContentType': 'video/mp4', 'ACL': 'public-read'}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Successfully uploaded video {video_id} to CDN")
|
||||||
|
|
||||||
|
# Clear cache for this video to ensure next view gets updated HTML
|
||||||
|
if cache:
|
||||||
|
cache.delete(video_id)
|
||||||
|
logger.info(f"Cleared cache for video ID: {video_id}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error uploading video {video_id} to CDN: {e}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to download video {video_id} or S3 client not configured")
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
if os.path.exists(temp_path):
|
||||||
|
os.unlink(temp_path)
|
||||||
|
logger.info(f"Removed temporary file: {temp_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in download process for video {video_id}: {e}")
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
with download_lock:
|
||||||
|
download_tracker['active_downloads'] -= 1
|
||||||
|
download_tracker['in_progress'].discard(video_id)
|
||||||
|
|
||||||
|
def download_worker():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
with download_lock:
|
||||||
|
can_download = download_tracker['active_downloads'] < MAX_CONCURRENT_DOWNLOADS
|
||||||
|
queue_has_items = len(download_queue) > 0
|
||||||
|
|
||||||
|
if queue_has_items and can_download:
|
||||||
|
with download_lock:
|
||||||
|
# Get next video that is not already being downloaded
|
||||||
|
for i, (video_id, _, _) in enumerate(download_queue):
|
||||||
|
if video_id not in download_tracker['in_progress']:
|
||||||
|
video_info = download_queue.pop(i)
|
||||||
|
threading.Thread(target=download_and_upload_video,
|
||||||
|
args=(video_info[0], video_info[1], video_info[2])).start()
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in download worker: {e}")
|
||||||
|
time.sleep(5) # Back off in case of error
|
||||||
|
|
||||||
|
worker_thread = threading.Thread(target=download_worker, daemon=True)
|
||||||
|
worker_thread.start()
|
||||||
|
|
||||||
|
def is_video_in_cdn(video_id):
|
||||||
|
"""Check if video exists in CDN"""
|
||||||
|
if not s3_client:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3_client.head_object(Bucket=S3_BUCKET_NAME, Key=f"niconico/{video_id}.mp4")
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_video_being_downloaded(video_id):
|
||||||
|
"""Check if video is currently being downloaded"""
|
||||||
|
with download_lock:
|
||||||
|
return video_id in download_tracker['in_progress']
|
||||||
|
|
||||||
|
def get_cdn_url(video_id):
|
||||||
|
"""Get the CDN URL for a video"""
|
||||||
|
return f"{CDN_BASE_URL}/niconico/{video_id}.mp4"
|
||||||
|
|
||||||
|
def allow_download(params):
|
||||||
|
if params['video']['duration'] > 60 * 15:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_video_quality(params, quality_level_threshold=3):
|
||||||
|
"""Get the code of the best video quality available (optionally below a certain threshold)"""
|
||||||
|
videos = params['media']['domand']['videos']
|
||||||
|
eligible_videos = [v for v in videos if v['qualityLevel'] < quality_level_threshold]
|
||||||
|
if not eligible_videos:
|
||||||
|
return None
|
||||||
|
return str(max(eligible_videos, key=lambda x: int(x['qualityLevel']))['id'])
|
||||||
|
|
||||||
@app.route("/watch/<video_id>")
|
@app.route("/watch/<video_id>")
|
||||||
def proxy(video_id):
|
def proxy(video_id):
|
||||||
logger.info(f"Received request for video ID: {video_id}")
|
logger.info(f"Received request for video ID: {video_id}")
|
||||||
@ -43,6 +192,7 @@ def proxy(video_id):
|
|||||||
try:
|
try:
|
||||||
logger.info(f"Fetching content from URL: {real_url}")
|
logger.info(f"Fetching content from URL: {real_url}")
|
||||||
r = s.get(real_url, timeout=10)
|
r = s.get(real_url, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
|
logger.error(f"Error fetching the page for video ID '{video_id}': {e}")
|
||||||
return Response(status=500)
|
return Response(status=500)
|
||||||
@ -64,11 +214,28 @@ def proxy(video_id):
|
|||||||
logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
|
logger.warning(f"Failed to extract thumbnail info for video ID '{video_id}': {e}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
download_allowed = allow_download(params) if params else False
|
||||||
|
video_quality = get_video_quality(params) if params else None
|
||||||
|
if download_allowed and video_quality is not None:
|
||||||
|
video_in_cdn = is_video_in_cdn(video_id)
|
||||||
|
video_in_progress = is_video_being_downloaded(video_id)
|
||||||
|
if not video_in_cdn and not video_in_progress and s3_client:
|
||||||
|
with download_lock:
|
||||||
|
# Add to queue if not already in it
|
||||||
|
queue_video_ids = [item[0] for item in download_queue]
|
||||||
|
if video_id not in queue_video_ids:
|
||||||
|
download_queue.append((video_id, real_url, video_quality))
|
||||||
|
logger.info(f"Queued video ID {video_id} for download")
|
||||||
|
|
||||||
|
cdn_video_url = get_cdn_url(video_id)
|
||||||
og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore
|
og_tags = soup.find_all("meta", property=lambda x: x) # type: ignore
|
||||||
for tag in og_tags:
|
for tag in og_tags:
|
||||||
# Fix thumbnail
|
# Fix thumbnail
|
||||||
if tag.get("property") == "og:image" and thumbnail_url:
|
if tag.get("property") == "og:image" and thumbnail_url:
|
||||||
tag["content"] = thumbnail_url
|
tag["content"] = thumbnail_url
|
||||||
|
# Fix video URL
|
||||||
|
if tag.get("property") == "og:video:url" or tag.get("property") == "og:video:secure_url":
|
||||||
|
tag["content"] = cdn_video_url
|
||||||
|
|
||||||
og_tags_str = "\n".join(str(tag) for tag in og_tags)
|
og_tags_str = "\n".join(str(tag) for tag in og_tags)
|
||||||
html_response = f"""
|
html_response = f"""
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
beautifulsoup4==4.12.3
|
beautifulsoup4==4.12.3
|
||||||
Flask==3.1.0
|
Flask==3.1.0
|
||||||
Requests==2.32.3
|
Requests==2.32.3
|
||||||
|
diskcache==5.6.3
|
||||||
|
nndownload==1.19
|
||||||
|
boto3
|
Loading…
x
Reference in New Issue
Block a user