feat: add video re-encoding with auto-detected encoder and bbox interpolation

2026-02-27 22:35:46 +07:00 · 2026-02-27 22:35:46 +07:00 · ebd41ac0a4
commit ebd41ac0a4
parent 435a0a0de7
1 changed files with 274 additions and 0 deletions
--- a/src/faceblur/encode.py
+++ b/src/faceblur/encode.py
@ -0,0 +1,274 @@
+"""Video re-encoding with face blur applied."""
+
+import json
+import subprocess
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Set, Tuple
+
+import cv2
+import numpy as np
+
+from .blur import BlurMethod, apply_blur, get_bboxes_for_frame
+from .cluster import Cluster
+from .detect import FaceData
+
+
+ENCODER_PRIORITY = [
+    "h264_nvenc",
+    "h264_vaapi",
+    "h264_amf",
+    "h264_qsv",
+    "libopenh264",
+]
+
+
+def probe_video(video_path: Path) -> dict:
+    """Probe video file to get codec, bitrate, fps, resolution, audio info.
+
+    Args:
+        video_path: Path to input video
+
+    Returns:
+        Dict with keys: width, height, fps, bitrate, codec, audio_codec, audio_bitrate
+    """
+    cmd = [
+        "ffprobe",
+        "-v",
+        "quiet",
+        "-print_format",
+        "json",
+        "-show_streams",
+        "-show_format",
+        str(video_path),
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffprobe failed: {result.stderr}")
+
+    data = json.loads(result.stdout)
+    info = {
+        "width": 0,
+        "height": 0,
+        "fps": 30.0,
+        "bitrate": "4M",
+        "codec": "h264",
+        "audio_codec": None,
+        "audio_bitrate": None,
+        "total_frames": 0,
+    }
+
+    for stream in data.get("streams", []):
+        if stream.get("codec_type") == "video":
+            info["width"] = int(stream.get("width", 0))
+            info["height"] = int(stream.get("height", 0))
+            info["codec"] = stream.get("codec_name", "h264")
+
+            # Parse fps from r_frame_rate (e.g., "60/1")
+            fps_str = stream.get("r_frame_rate", "30/1")
+            if "/" in fps_str:
+                num, den = fps_str.split("/")
+                info["fps"] = float(num) / float(den) if float(den) > 0 else 30.0
+            else:
+                info["fps"] = float(fps_str)
+
+            # Bitrate from stream or format
+            if stream.get("bit_rate"):
+                info["bitrate"] = stream["bit_rate"]
+            elif data.get("format", {}).get("bit_rate"):
+                info["bitrate"] = data["format"]["bit_rate"]
+
+            # Total frames
+            nb_frames = stream.get("nb_frames")
+            if nb_frames and nb_frames != "N/A":
+                info["total_frames"] = int(nb_frames)
+            else:
+                duration = float(data.get("format", {}).get("duration", 0))
+                info["total_frames"] = int(duration * info["fps"])
+
+        elif stream.get("codec_type") == "audio":
+            info["audio_codec"] = stream.get("codec_name")
+            info["audio_bitrate"] = stream.get("bit_rate")
+
+    return info
+
+
+def find_best_encoder() -> str:
+    """Find the best available H.264 encoder by testing each in priority order.
+
+    Returns:
+        Name of the best available encoder
+    """
+    for encoder in ENCODER_PRIORITY:
+        cmd = [
+            "ffmpeg",
+            "-v",
+            "quiet",
+            "-f",
+            "lavfi",
+            "-i",
+            "nullsrc=s=64x64:d=0.1",
+            "-c:v",
+            encoder,
+            "-f",
+            "null",
+            "-",
+        ]
+        result = subprocess.run(cmd, capture_output=True, timeout=10)
+        if result.returncode == 0:
+            return encoder
+
+    raise RuntimeError(
+        "No H.264 encoder found. Available encoders checked: "
+        + ", ".join(ENCODER_PRIORITY)
+    )
+
+
+def build_keyframe_bboxes(
+    clusters: List[Cluster],
+    selected_cluster_ids: Set[int],
+    frame_interval: int,
+) -> Tuple[Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]], List[int]]:
+    """Build a lookup of keyframe bboxes for selected clusters.
+
+    Args:
+        clusters: All clusters from detection
+        selected_cluster_ids: Set of cluster IDs to blur
+        frame_interval: The interval used for frame extraction
+
+    Returns:
+        (keyframe_bboxes dict, sorted keyframe_indices list)
+    """
+    keyframe_bboxes: Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]] = {}
+
+    for cluster in clusters:
+        if cluster.id not in selected_cluster_ids:
+            continue
+        for face in cluster.faces:
+            # Convert 1-based frame file index to 0-based video frame index
+            video_frame_idx = (face.frame_index - 1) * frame_interval
+            if video_frame_idx not in keyframe_bboxes:
+                keyframe_bboxes[video_frame_idx] = []
+            keyframe_bboxes[video_frame_idx].append((cluster.id, face.bbox))
+
+    keyframe_indices = sorted(keyframe_bboxes.keys())
+    return keyframe_bboxes, keyframe_indices
+
+
+def encode_video(
+    input_path: Path,
+    output_path: Path,
+    clusters: List[Cluster],
+    selected_cluster_ids: Set[int],
+    frame_interval: int,
+    blur_method: BlurMethod = "gaussian",
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+) -> None:
+    """Re-encode video with face blur applied to selected clusters.
+
+    Args:
+        input_path: Path to input video
+        output_path: Path to output video
+        clusters: All detected clusters
+        selected_cluster_ids: Cluster IDs to blur
+        frame_interval: Frame interval used during detection
+        blur_method: Blur method to use
+        progress_callback: Called with (current_frame, total_frames)
+    """
+    video_info = probe_video(input_path)
+    encoder = find_best_encoder()
+
+    keyframe_bboxes, keyframe_indices = build_keyframe_bboxes(
+        clusters,
+        selected_cluster_ids,
+        frame_interval,
+    )
+
+    width = video_info["width"]
+    height = video_info["height"]
+    fps = video_info["fps"]
+    bitrate = video_info["bitrate"]
+    total_frames = video_info["total_frames"]
+
+    # Build FFmpeg encode command
+    ffmpeg_cmd = [
+        "ffmpeg",
+        "-y",
+        "-f",
+        "rawvideo",
+        "-pix_fmt",
+        "bgr24",
+        "-s",
+        f"{width}x{height}",
+        "-r",
+        str(fps),
+        "-i",
+        "pipe:0",
+        "-i",
+        str(input_path),
+        "-map",
+        "0:v:0",
+    ]
+
+    # Map audio from original if present
+    if video_info["audio_codec"]:
+        ffmpeg_cmd.extend(["-map", "1:a:0", "-c:a", "copy"])
+
+    ffmpeg_cmd.extend(
+        [
+            "-c:v",
+            encoder,
+            "-b:v",
+            str(bitrate),
+            "-pix_fmt",
+            "yuv420p",
+            str(output_path),
+        ]
+    )
+
+    # Open input video
+    cap = cv2.VideoCapture(str(input_path))
+    if not cap.isOpened():
+        raise RuntimeError(f"Could not open video: {input_path}")
+
+    # Start FFmpeg process
+    proc = subprocess.Popen(
+        ffmpeg_cmd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    try:
+        frame_idx = 0
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            # Get bboxes for this frame (exact or interpolated)
+            face_bboxes = get_bboxes_for_frame(
+                frame_idx,
+                keyframe_bboxes,
+                keyframe_indices,
+            )
+
+            # Apply blur to each face
+            for _cluster_id, bbox in face_bboxes:
+                frame = apply_blur(frame, bbox, method=blur_method)
+
+            # Write frame to FFmpeg
+            proc.stdin.write(frame.tobytes())
+
+            frame_idx += 1
+            if progress_callback and total_frames > 0:
+                progress_callback(frame_idx, total_frames)
+
+    finally:
+        cap.release()
+        if proc.stdin:
+            proc.stdin.close()
+        proc.wait()
+
+    if proc.returncode != 0:
+        stderr = proc.stderr.read().decode() if proc.stderr else ""
+        raise RuntimeError(f"FFmpeg encoding failed: {stderr}")