Initial commit

This commit is contained in:
2026-06-12 15:29:30 -07:00
commit e6e0c8d77e
13 changed files with 1030 additions and 0 deletions

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,130 @@
import re
import shutil
import subprocess
import sys
import unicodedata
from pathlib import Path
class ConversionError(RuntimeError):
pass
SUPPORTED_FORMATS = {"mp3", "wav", "flac", "m4a", "ogg"}
MP3_QUALITIES = {"128", "192", "256", "320"}
def normalize_format(format_name: str) -> str:
normalized = (format_name or "").strip().lower()
if normalized not in SUPPORTED_FORMATS:
raise ConversionError("Choose a supported output format.")
return normalized
def normalize_quality(format_name: str, quality: str | None) -> str:
if format_name != "mp3":
return "192"
normalized = (quality or "192").strip()
if normalized not in MP3_QUALITIES:
raise ConversionError("Choose a supported MP3 quality.")
return normalized
def sanitize_filename(name: str, fallback: str = "youtube-audio") -> str:
normalized = unicodedata.normalize("NFKD", name or "").encode("ascii", "ignore").decode("ascii")
normalized = re.sub(r"[^\w\s.-]", "", normalized)
normalized = re.sub(r"[\s_]+", "-", normalized).strip(".-")
return (normalized or fallback)[:120]
def download_audio(url: str, workdir: Path) -> Path:
workdir.mkdir(parents=True, exist_ok=True)
output_template = str(workdir / "source.%(ext)s")
command = [
sys.executable,
"-m",
"yt_dlp",
"--no-playlist",
"--no-warnings",
"-f",
"bestaudio/best",
"-o",
output_template,
url,
]
_run_process(command, "Could not download audio from YouTube.", timeout=1800)
source_files = sorted(
path for path in workdir.glob("source.*") if path.is_file() and not path.name.endswith(".part")
)
if not source_files:
raise ConversionError("The audio download finished, but no source file was created.")
return source_files[0]
def convert_audio(source_path: Path, output_path: Path, format_name: str, quality: str) -> Path:
if not shutil.which("ffmpeg"):
raise ConversionError("ffmpeg is not installed or is not available on PATH.")
format_name = normalize_format(format_name)
quality = normalize_quality(format_name, quality)
output_path.parent.mkdir(parents=True, exist_ok=True)
command = ["ffmpeg", "-y", "-i", str(source_path), "-vn"]
if format_name == "mp3":
command.extend(["-codec:a", "libmp3lame", "-b:a", f"{quality}k"])
elif format_name == "wav":
command.extend(["-codec:a", "pcm_s16le"])
elif format_name == "flac":
command.extend(["-codec:a", "flac"])
elif format_name == "m4a":
command.extend(["-codec:a", "aac", "-b:a", "192k"])
elif format_name == "ogg":
command.extend(["-codec:a", "libvorbis", "-q:a", "5"])
command.append(str(output_path))
_run_process(command, "Could not convert the downloaded audio.", timeout=1800)
if not output_path.exists():
raise ConversionError("Conversion finished, but no output file was created.")
return output_path
def clean_source_files(workdir: Path) -> None:
for path in workdir.glob("source.*"):
if path.is_file():
path.unlink(missing_ok=True)
def _run_process(command: list[str], fallback: str, timeout: int) -> None:
try:
result = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=timeout,
)
except subprocess.TimeoutExpired as exc:
raise ConversionError(f"{fallback} The process timed out.") from exc
if result.returncode != 0:
raise ConversionError(_friendly_process_error(result.stderr, fallback))
def _friendly_process_error(stderr: str, fallback: str) -> str:
lines = [line.strip() for line in (stderr or "").splitlines() if line.strip()]
if not lines:
return fallback
useful = [line for line in lines if "error" in line.lower() or line.startswith(("ERROR:", "WARNING:"))]
message = useful[-1] if useful else lines[-1]
for prefix in ("ERROR:", "WARNING:"):
if message.startswith(prefix):
message = message.removeprefix(prefix).strip()
return message or fallback

116
backend/services/youtube.py Normal file
View File

@@ -0,0 +1,116 @@
import json
import subprocess
import sys
from typing import Any
from urllib.parse import parse_qs, urlparse
class YouTubeServiceError(RuntimeError):
pass
YOUTUBE_HOSTS = {
"youtube.com",
"www.youtube.com",
"m.youtube.com",
"music.youtube.com",
"youtu.be",
"www.youtu.be",
"youtube-nocookie.com",
"www.youtube-nocookie.com",
}
def validate_youtube_url(url: str) -> str:
candidate = (url or "").strip()
parsed = urlparse(candidate)
if parsed.scheme not in {"http", "https"}:
raise YouTubeServiceError("Enter a full YouTube URL starting with http:// or https://.")
host = (parsed.netloc or "").lower().removesuffix(".")
if host not in YOUTUBE_HOSTS:
raise YouTubeServiceError("Only YouTube video URLs are supported.")
path = parsed.path or ""
query = parse_qs(parsed.query)
if host in {"youtu.be", "www.youtu.be"} and path.strip("/"):
return candidate
if host.endswith("youtube.com") or host.endswith("youtube-nocookie.com"):
if path == "/watch" and query.get("v", [""])[0].strip():
return candidate
parts = [part for part in path.split("/") if part]
if len(parts) >= 2 and parts[0] in {"shorts", "live", "embed"} and parts[1].strip():
return candidate
raise YouTubeServiceError("That does not look like a supported YouTube video URL.")
def fetch_video_info(url: str) -> dict[str, Any]:
valid_url = validate_youtube_url(url)
command = [
sys.executable,
"-m",
"yt_dlp",
"--dump-single-json",
"--no-playlist",
"--skip-download",
"--no-warnings",
valid_url,
]
try:
result = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=90,
)
except subprocess.TimeoutExpired as exc:
raise YouTubeServiceError("Fetching video info timed out.") from exc
if result.returncode != 0:
raise YouTubeServiceError(_friendly_process_error(result.stderr, "Could not fetch video info."))
try:
data = json.loads(result.stdout)
except json.JSONDecodeError as exc:
raise YouTubeServiceError("yt-dlp returned video info in an unreadable format.") from exc
return {
"title": data.get("title") or "Untitled YouTube video",
"channel": data.get("channel") or data.get("uploader") or "Unknown channel",
"duration": _format_duration(data.get("duration")),
"duration_seconds": data.get("duration"),
"thumbnail": data.get("thumbnail"),
"webpage_url": data.get("webpage_url") or valid_url,
}
def _format_duration(seconds: Any) -> str:
if not isinstance(seconds, (int, float)) or seconds < 0:
return "Unknown"
total = int(seconds)
hours, remainder = divmod(total, 3600)
minutes, secs = divmod(remainder, 60)
if hours:
return f"{hours}:{minutes:02d}:{secs:02d}"
return f"{minutes}:{secs:02d}"
def _friendly_process_error(stderr: str, fallback: str) -> str:
lines = [line.strip() for line in (stderr or "").splitlines() if line.strip()]
if not lines:
return fallback
message = lines[-1]
for prefix in ("ERROR:", "WARNING:"):
if message.startswith(prefix):
message = message.removeprefix(prefix).strip()
return message or fallback