From 966743140625379d5ba23a04ca9f3dcceeb8b239 Mon Sep 17 00:00:00 2001
From: fiatcode <fiatcode@posteo.com>
Date: Sat, 28 Feb 2026 10:17:11 +0700
Subject: [PATCH] chore: project cleanup, track missing files, and update
 README

---
 .gitignore               | 13 +++++++++
 .python-version          |  1 +
 README.md                | 53 ++++++++++++++++++++++++++++++++++
 main.py                  |  4 +++
 src/faceblur/__init__.py |  3 ++
 src/faceblur/encode.py   |  2 --
 src/faceblur/output.py   |  1 -
 src/faceblur/video.py    | 61 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 135 insertions(+), 3 deletions(-)
 create mode 100644 .python-version
 create mode 100644 README.md
 create mode 100644 main.py
 create mode 100644 src/faceblur/__init__.py
 create mode 100644 src/faceblur/video.py

diff --git a/.gitignore b/.gitignore
index 62a329e..3dc2924 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,16 @@ wheels/
 
 # Downloaded ML models
 models/
+
+# Video files (avoid committing test videos)
+*.mp4
+*.avi
+*.mkv
+*.mov
+
+# Editor swap files
+.*.swp
+*.swp
+
+# AI Coding context
+.opencode/
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..e4fba21
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0991b0f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,53 @@
+# PyFaceBlur
+
+An interactive command-line tool that automatically detects, clusters, and blurs faces in videos. It guides you through a simple step-by-step process to extract frames, group people by facial identity, select who you want to blur, and re-encode the video.
+
+## Features
+
+- **Interactive CLI:** Built with `rich` and `questionary` for a clean, prompt-based UX including file path auto-completion.
+- **Accurate Face Recognition:** Uses [UniFace](https://github.com/yakhyo/uniface) (RetinaFace detection + ArcFace 512-dim neural embeddings via ONNX Runtime) to accurately re-identify the same person across a video.
+- **DBSCAN Clustering:** Automatically groups identical faces into "clusters" using Cosine similarity.
+- **Hardware-Accelerated Encoding:** Automatically detects and leverages GPU encoders like `av1_vaapi`, `hevc_vaapi`, `h264_vaapi`, `h264_nvenc`, and more via FFmpeg.
+- **Visual Face Selection:** Extracts one high-quality thumbnail per detected person and opens your system's file explorer so you can easily check boxes for who to blur.
+- **Multiple Blur Styles:** Choose from Gaussian, Pixelate, Blackout, Elliptical, or Median blur methods.
+- **Smooth Interpolation:** Bounding boxes are linearly interpolated between sampled keyframes and held static when faces exit/enter, ensuring smooth blurring without split-second exposures.
+
+## Requirements
+
+- Python 3.11+
+- [uv](https://docs.astral.sh/uv/) for fast dependency management
+- `ffmpeg` installed and available in your system `$PATH` (for frame extraction and re-encoding)
+
+## Setup
+
+```bash
+# Clone the repository and navigate to the project directory
+cd faceblur-poc
+
+# Sync dependencies using uv
+uv sync
+```
+
+## Usage
+
+Run the interactive wizard:
+
+```bash
+uv run pyfaceblur
+```
+
+### The Pipeline
+
+1. **Input:** You provide the path to your video and the frame sampling interval (e.g., sample every 30th frame).
+2. **Processing:** The app uses FFmpeg to extract frames, runs RetinaFace to find all faces, and generates ArcFace embeddings.
+3. **Clustering:** DBSCAN groups the embeddings to identify unique individuals.
+4. **Selection:** The app saves a thumbnail of each person to a temporary folder, opens it, and asks you to select which people to blur using interactive checkboxes.
+5. **Encoding:** The app finds the best available video encoder on your system, applies the chosen blur method to the selected faces, interpolates their movement, and generates a new `*_blurred.mp4` video.
+
+## Advanced / POC CLI
+
+The original proof-of-concept command-line interface is also still available for purely extracting and debugging the clustering outputs into an output folder.
+
+```bash
+uv run faceblur-poc detect --video input.mp4 --output ./output --interval 30 --confidence 0.7
+```
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..227ea4d
--- /dev/null
+++ b/main.py
@@ -0,0 +1,4 @@
+from faceblur.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/src/faceblur/__init__.py b/src/faceblur/__init__.py
new file mode 100644
index 0000000..0066470
--- /dev/null
+++ b/src/faceblur/__init__.py
@@ -0,0 +1,3 @@
+"""Face detection and clustering POC."""
+
+__version__ = "0.1.0"
diff --git a/src/faceblur/encode.py b/src/faceblur/encode.py
index 230f5be..6dbd953 100644
--- a/src/faceblur/encode.py
+++ b/src/faceblur/encode.py
@@ -6,11 +6,9 @@ from pathlib import Path
 from typing import Callable, Dict, List, Optional, Set, Tuple
 
 import cv2
-import numpy as np
 
 from .blur import BlurMethod, apply_blur, get_bboxes_for_frame
 from .cluster import Cluster
-from .detect import FaceData
 
 
 ENCODER_PRIORITY = [
diff --git a/src/faceblur/output.py b/src/faceblur/output.py
index 3921a5e..b9e697c 100644
--- a/src/faceblur/output.py
+++ b/src/faceblur/output.py
@@ -5,7 +5,6 @@ from pathlib import Path
 from typing import List, Dict, Tuple
 
 import cv2
-import numpy as np
 
 from .video import Frame
 from .detect import FaceData
diff --git a/src/faceblur/video.py b/src/faceblur/video.py
new file mode 100644
index 0000000..a265027
--- /dev/null
+++ b/src/faceblur/video.py
@@ -0,0 +1,61 @@
+"""Video frame extraction module."""
+
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List
+
+
+@dataclass
+class Frame:
+    """Represents an extracted video frame."""
+
+    path: Path
+    index: int
+
+
+def extract_frames(video_path: str, output_dir: str, interval: int = 30) -> List[Frame]:
+    """Extract frames from video at specified interval.
+
+    Args:
+        video_path: Path to input video file
+        output_dir: Directory to save extracted frames
+        interval: Extract every Nth frame
+
+    Returns:
+        List of Frame objects
+    """
+    video_path = Path(video_path)
+    output_dir = Path(output_dir)
+
+    if not video_path.exists():
+        raise FileNotFoundError(f"Video file not found: {video_path}")
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    pattern = str(output_dir / "frame_%04d.jpg")
+
+    cmd = [
+        "ffmpeg",
+        "-i",
+        str(video_path),
+        "-vf",
+        f"select='not(mod(n\\,{interval}))'",
+        "-vsync",
+        "vfr",
+        "-q:v",
+        "2",
+        "-y",
+        pattern,
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise RuntimeError(f"ffmpeg failed: {result.stderr}")
+
+    frames = []
+    for frame_path in sorted(output_dir.glob("frame_*.jpg")):
+        index = int(frame_path.stem.split("_")[1])
+        frames.append(Frame(path=frame_path, index=index))
+
+    return frames