py-faceblur/src/faceblur/encode.py

"""Video re-encoding with face blur applied."""

import json
import subprocess
from pathlib import Path
from typing import Callable, Dict, List, Optional, Set, Tuple

import cv2
import numpy as np

from .blur import BlurMethod, apply_blur, get_bboxes_for_frame
from .cluster import Cluster
from .detect import FaceData


ENCODER_PRIORITY = [
    "h264_nvenc",
    "h264_vaapi",
    "h264_amf",
    "h264_qsv",
    "libopenh264",
]


def probe_video(video_path: Path) -> dict:
    """Probe video file to get codec, bitrate, fps, resolution, audio info.

    Args:
        video_path: Path to input video

    Returns:
        Dict with keys: width, height, fps, bitrate, codec, audio_codec, audio_bitrate
    """
    cmd = [
        "ffprobe",
        "-v",
        "quiet",
        "-print_format",
        "json",
        "-show_streams",
        "-show_format",
        str(video_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"ffprobe failed: {result.stderr}")

    data = json.loads(result.stdout)
    info = {
        "width": 0,
        "height": 0,
        "fps": 30.0,
        "bitrate": "4M",
        "codec": "h264",
        "audio_codec": None,
        "audio_bitrate": None,
        "total_frames": 0,
    }

    for stream in data.get("streams", []):
        if stream.get("codec_type") == "video":
            info["width"] = int(stream.get("width", 0))
            info["height"] = int(stream.get("height", 0))
            info["codec"] = stream.get("codec_name", "h264")

            # Parse fps from r_frame_rate (e.g., "60/1")
            fps_str = stream.get("r_frame_rate", "30/1")
            if "/" in fps_str:
                num, den = fps_str.split("/")
                info["fps"] = float(num) / float(den) if float(den) > 0 else 30.0
            else:
                info["fps"] = float(fps_str)

            # Bitrate from stream or format
            if stream.get("bit_rate"):
                info["bitrate"] = stream["bit_rate"]
            elif data.get("format", {}).get("bit_rate"):
                info["bitrate"] = data["format"]["bit_rate"]

            # Total frames
            nb_frames = stream.get("nb_frames")
            if nb_frames and nb_frames != "N/A":
                info["total_frames"] = int(nb_frames)
            else:
                duration = float(data.get("format", {}).get("duration", 0))
                info["total_frames"] = int(duration * info["fps"])

        elif stream.get("codec_type") == "audio":
            info["audio_codec"] = stream.get("codec_name")
            info["audio_bitrate"] = stream.get("bit_rate")

    return info


def find_best_encoder() -> str:
    """Find the best available H.264 encoder by testing each in priority order.

    Returns:
        Name of the best available encoder
    """
    for encoder in ENCODER_PRIORITY:
        cmd = [
            "ffmpeg",
            "-v",
            "quiet",
            "-f",
            "lavfi",
            "-i",
            "nullsrc=s=64x64:d=0.1",
            "-c:v",
            encoder,
            "-f",
            "null",
            "-",
        ]
        result = subprocess.run(cmd, capture_output=True, timeout=10)
        if result.returncode == 0:
            return encoder

    raise RuntimeError(
        "No H.264 encoder found. Available encoders checked: "
        + ", ".join(ENCODER_PRIORITY)
    )


def build_keyframe_bboxes(
    clusters: List[Cluster],
    selected_cluster_ids: Set[int],
    frame_interval: int,
) -> Tuple[Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]], List[int]]:
    """Build a lookup of keyframe bboxes for selected clusters.

    Args:
        clusters: All clusters from detection
        selected_cluster_ids: Set of cluster IDs to blur
        frame_interval: The interval used for frame extraction

    Returns:
        (keyframe_bboxes dict, sorted keyframe_indices list)
    """
    keyframe_bboxes: Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]] = {}

    for cluster in clusters:
        if cluster.id not in selected_cluster_ids:
            continue
        for face in cluster.faces:
            # Convert 1-based frame file index to 0-based video frame index
            video_frame_idx = (face.frame_index - 1) * frame_interval
            if video_frame_idx not in keyframe_bboxes:
                keyframe_bboxes[video_frame_idx] = []
            keyframe_bboxes[video_frame_idx].append((cluster.id, face.bbox))

    keyframe_indices = sorted(keyframe_bboxes.keys())
    return keyframe_bboxes, keyframe_indices


def encode_video(
    input_path: Path,
    output_path: Path,
    clusters: List[Cluster],
    selected_cluster_ids: Set[int],
    frame_interval: int,
    blur_method: BlurMethod = "gaussian",
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> None:
    """Re-encode video with face blur applied to selected clusters.

    Args:
        input_path: Path to input video
        output_path: Path to output video
        clusters: All detected clusters
        selected_cluster_ids: Cluster IDs to blur
        frame_interval: Frame interval used during detection
        blur_method: Blur method to use
        progress_callback: Called with (current_frame, total_frames)
    """
    video_info = probe_video(input_path)
    encoder = find_best_encoder()

    keyframe_bboxes, keyframe_indices = build_keyframe_bboxes(
        clusters,
        selected_cluster_ids,
        frame_interval,
    )

    width = video_info["width"]
    height = video_info["height"]
    fps = video_info["fps"]
    bitrate = video_info["bitrate"]
    total_frames = video_info["total_frames"]

    # Build FFmpeg encode command
    ffmpeg_cmd = [
        "ffmpeg",
        "-y",
        "-f",
        "rawvideo",
        "-pix_fmt",
        "bgr24",
        "-s",
        f"{width}x{height}",
        "-r",
        str(fps),
        "-i",
        "pipe:0",
        "-i",
        str(input_path),
        "-map",
        "0:v:0",
    ]

    # Map audio from original if present
    if video_info["audio_codec"]:
        ffmpeg_cmd.extend(["-map", "1:a:0", "-c:a", "copy"])

    ffmpeg_cmd.extend(
        [
            "-c:v",
            encoder,
            "-b:v",
            str(bitrate),
            "-pix_fmt",
            "yuv420p",
            str(output_path),
        ]
    )

    # Open input video
    cap = cv2.VideoCapture(str(input_path))
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video: {input_path}")

    # Start FFmpeg process
    proc = subprocess.Popen(
        ffmpeg_cmd,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )

    try:
        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Get bboxes for this frame (exact or interpolated)
            face_bboxes = get_bboxes_for_frame(
                frame_idx,
                keyframe_bboxes,
                keyframe_indices,
            )

            # Apply blur to each face
            for _cluster_id, bbox in face_bboxes:
                frame = apply_blur(frame, bbox, method=blur_method)

            # Write frame to FFmpeg
            proc.stdin.write(frame.tobytes())

            frame_idx += 1
            if progress_callback and total_frames > 0:
                progress_callback(frame_idx, total_frames)

    finally:
        cap.release()
        if proc.stdin:
            proc.stdin.close()
        proc.wait()

    if proc.returncode != 0:
        stderr = proc.stderr.read().decode() if proc.stderr else ""
        raise RuntimeError(f"FFmpeg encoding failed: {stderr}")