feat: add video re-encoding with auto-detected encoder and bbox interpolation
This commit is contained in:
parent
435a0a0de7
commit
ebd41ac0a4
1 changed files with 274 additions and 0 deletions
274
src/faceblur/encode.py
Normal file
274
src/faceblur/encode.py
Normal file
|
|
@ -0,0 +1,274 @@
|
||||||
|
"""Video re-encoding with face blur applied."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from .blur import BlurMethod, apply_blur, get_bboxes_for_frame
|
||||||
|
from .cluster import Cluster
|
||||||
|
from .detect import FaceData
|
||||||
|
|
||||||
|
|
||||||
|
ENCODER_PRIORITY = [
|
||||||
|
"h264_nvenc",
|
||||||
|
"h264_vaapi",
|
||||||
|
"h264_amf",
|
||||||
|
"h264_qsv",
|
||||||
|
"libopenh264",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def probe_video(video_path: Path) -> dict:
|
||||||
|
"""Probe video file to get codec, bitrate, fps, resolution, audio info.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path: Path to input video
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys: width, height, fps, bitrate, codec, audio_codec, audio_bitrate
|
||||||
|
"""
|
||||||
|
cmd = [
|
||||||
|
"ffprobe",
|
||||||
|
"-v",
|
||||||
|
"quiet",
|
||||||
|
"-print_format",
|
||||||
|
"json",
|
||||||
|
"-show_streams",
|
||||||
|
"-show_format",
|
||||||
|
str(video_path),
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"ffprobe failed: {result.stderr}")
|
||||||
|
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
info = {
|
||||||
|
"width": 0,
|
||||||
|
"height": 0,
|
||||||
|
"fps": 30.0,
|
||||||
|
"bitrate": "4M",
|
||||||
|
"codec": "h264",
|
||||||
|
"audio_codec": None,
|
||||||
|
"audio_bitrate": None,
|
||||||
|
"total_frames": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
for stream in data.get("streams", []):
|
||||||
|
if stream.get("codec_type") == "video":
|
||||||
|
info["width"] = int(stream.get("width", 0))
|
||||||
|
info["height"] = int(stream.get("height", 0))
|
||||||
|
info["codec"] = stream.get("codec_name", "h264")
|
||||||
|
|
||||||
|
# Parse fps from r_frame_rate (e.g., "60/1")
|
||||||
|
fps_str = stream.get("r_frame_rate", "30/1")
|
||||||
|
if "/" in fps_str:
|
||||||
|
num, den = fps_str.split("/")
|
||||||
|
info["fps"] = float(num) / float(den) if float(den) > 0 else 30.0
|
||||||
|
else:
|
||||||
|
info["fps"] = float(fps_str)
|
||||||
|
|
||||||
|
# Bitrate from stream or format
|
||||||
|
if stream.get("bit_rate"):
|
||||||
|
info["bitrate"] = stream["bit_rate"]
|
||||||
|
elif data.get("format", {}).get("bit_rate"):
|
||||||
|
info["bitrate"] = data["format"]["bit_rate"]
|
||||||
|
|
||||||
|
# Total frames
|
||||||
|
nb_frames = stream.get("nb_frames")
|
||||||
|
if nb_frames and nb_frames != "N/A":
|
||||||
|
info["total_frames"] = int(nb_frames)
|
||||||
|
else:
|
||||||
|
duration = float(data.get("format", {}).get("duration", 0))
|
||||||
|
info["total_frames"] = int(duration * info["fps"])
|
||||||
|
|
||||||
|
elif stream.get("codec_type") == "audio":
|
||||||
|
info["audio_codec"] = stream.get("codec_name")
|
||||||
|
info["audio_bitrate"] = stream.get("bit_rate")
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def find_best_encoder() -> str:
|
||||||
|
"""Find the best available H.264 encoder by testing each in priority order.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Name of the best available encoder
|
||||||
|
"""
|
||||||
|
for encoder in ENCODER_PRIORITY:
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-v",
|
||||||
|
"quiet",
|
||||||
|
"-f",
|
||||||
|
"lavfi",
|
||||||
|
"-i",
|
||||||
|
"nullsrc=s=64x64:d=0.1",
|
||||||
|
"-c:v",
|
||||||
|
encoder,
|
||||||
|
"-f",
|
||||||
|
"null",
|
||||||
|
"-",
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, timeout=10)
|
||||||
|
if result.returncode == 0:
|
||||||
|
return encoder
|
||||||
|
|
||||||
|
raise RuntimeError(
|
||||||
|
"No H.264 encoder found. Available encoders checked: "
|
||||||
|
+ ", ".join(ENCODER_PRIORITY)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def build_keyframe_bboxes(
|
||||||
|
clusters: List[Cluster],
|
||||||
|
selected_cluster_ids: Set[int],
|
||||||
|
frame_interval: int,
|
||||||
|
) -> Tuple[Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]], List[int]]:
|
||||||
|
"""Build a lookup of keyframe bboxes for selected clusters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
clusters: All clusters from detection
|
||||||
|
selected_cluster_ids: Set of cluster IDs to blur
|
||||||
|
frame_interval: The interval used for frame extraction
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(keyframe_bboxes dict, sorted keyframe_indices list)
|
||||||
|
"""
|
||||||
|
keyframe_bboxes: Dict[int, List[Tuple[int, Tuple[int, int, int, int]]]] = {}
|
||||||
|
|
||||||
|
for cluster in clusters:
|
||||||
|
if cluster.id not in selected_cluster_ids:
|
||||||
|
continue
|
||||||
|
for face in cluster.faces:
|
||||||
|
# Convert 1-based frame file index to 0-based video frame index
|
||||||
|
video_frame_idx = (face.frame_index - 1) * frame_interval
|
||||||
|
if video_frame_idx not in keyframe_bboxes:
|
||||||
|
keyframe_bboxes[video_frame_idx] = []
|
||||||
|
keyframe_bboxes[video_frame_idx].append((cluster.id, face.bbox))
|
||||||
|
|
||||||
|
keyframe_indices = sorted(keyframe_bboxes.keys())
|
||||||
|
return keyframe_bboxes, keyframe_indices
|
||||||
|
|
||||||
|
|
||||||
|
def encode_video(
|
||||||
|
input_path: Path,
|
||||||
|
output_path: Path,
|
||||||
|
clusters: List[Cluster],
|
||||||
|
selected_cluster_ids: Set[int],
|
||||||
|
frame_interval: int,
|
||||||
|
blur_method: BlurMethod = "gaussian",
|
||||||
|
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Re-encode video with face blur applied to selected clusters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: Path to input video
|
||||||
|
output_path: Path to output video
|
||||||
|
clusters: All detected clusters
|
||||||
|
selected_cluster_ids: Cluster IDs to blur
|
||||||
|
frame_interval: Frame interval used during detection
|
||||||
|
blur_method: Blur method to use
|
||||||
|
progress_callback: Called with (current_frame, total_frames)
|
||||||
|
"""
|
||||||
|
video_info = probe_video(input_path)
|
||||||
|
encoder = find_best_encoder()
|
||||||
|
|
||||||
|
keyframe_bboxes, keyframe_indices = build_keyframe_bboxes(
|
||||||
|
clusters,
|
||||||
|
selected_cluster_ids,
|
||||||
|
frame_interval,
|
||||||
|
)
|
||||||
|
|
||||||
|
width = video_info["width"]
|
||||||
|
height = video_info["height"]
|
||||||
|
fps = video_info["fps"]
|
||||||
|
bitrate = video_info["bitrate"]
|
||||||
|
total_frames = video_info["total_frames"]
|
||||||
|
|
||||||
|
# Build FFmpeg encode command
|
||||||
|
ffmpeg_cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-f",
|
||||||
|
"rawvideo",
|
||||||
|
"-pix_fmt",
|
||||||
|
"bgr24",
|
||||||
|
"-s",
|
||||||
|
f"{width}x{height}",
|
||||||
|
"-r",
|
||||||
|
str(fps),
|
||||||
|
"-i",
|
||||||
|
"pipe:0",
|
||||||
|
"-i",
|
||||||
|
str(input_path),
|
||||||
|
"-map",
|
||||||
|
"0:v:0",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Map audio from original if present
|
||||||
|
if video_info["audio_codec"]:
|
||||||
|
ffmpeg_cmd.extend(["-map", "1:a:0", "-c:a", "copy"])
|
||||||
|
|
||||||
|
ffmpeg_cmd.extend(
|
||||||
|
[
|
||||||
|
"-c:v",
|
||||||
|
encoder,
|
||||||
|
"-b:v",
|
||||||
|
str(bitrate),
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
str(output_path),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Open input video
|
||||||
|
cap = cv2.VideoCapture(str(input_path))
|
||||||
|
if not cap.isOpened():
|
||||||
|
raise RuntimeError(f"Could not open video: {input_path}")
|
||||||
|
|
||||||
|
# Start FFmpeg process
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
ffmpeg_cmd,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
frame_idx = 0
|
||||||
|
while True:
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get bboxes for this frame (exact or interpolated)
|
||||||
|
face_bboxes = get_bboxes_for_frame(
|
||||||
|
frame_idx,
|
||||||
|
keyframe_bboxes,
|
||||||
|
keyframe_indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply blur to each face
|
||||||
|
for _cluster_id, bbox in face_bboxes:
|
||||||
|
frame = apply_blur(frame, bbox, method=blur_method)
|
||||||
|
|
||||||
|
# Write frame to FFmpeg
|
||||||
|
proc.stdin.write(frame.tobytes())
|
||||||
|
|
||||||
|
frame_idx += 1
|
||||||
|
if progress_callback and total_frames > 0:
|
||||||
|
progress_callback(frame_idx, total_frames)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
cap.release()
|
||||||
|
if proc.stdin:
|
||||||
|
proc.stdin.close()
|
||||||
|
proc.wait()
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
stderr = proc.stderr.read().decode() if proc.stderr else ""
|
||||||
|
raise RuntimeError(f"FFmpeg encoding failed: {stderr}")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue