feat: replace MediaPipe + HSV histograms with RetinaFace + ArcFace

2026-02-27 21:37:34 +07:00 · 2026-02-27 21:37:34 +07:00 · 60e8702199
commit 60e8702199
parent 379692f313
1 changed files with 74 additions and 0 deletions
--- a/src/faceblur/detect.py
+++ b/src/faceblur/detect.py
@ -0,0 +1,74 @@
+"""Face detection module using UniFace (RetinaFace + ArcFace)."""
+
+import cv2
+import numpy as np
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Tuple
+
+from uniface.detection import RetinaFace
+from uniface.recognition import ArcFace
+
+
+@dataclass
+class FaceData:
+    """Detected face with embedding."""
+
+    id: int
+    frame_path: Path
+    frame_index: int
+    bbox: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    embedding: np.ndarray
+    confidence: float
+    landmarks: np.ndarray = field(default_factory=lambda: np.empty(0))
+
+
+class FaceDetector:
+    """Face detector using RetinaFace + ArcFace via UniFace."""
+
+    def __init__(self):
+        self.detector = RetinaFace()
+        self.recognizer = ArcFace()
+
+    def detect_faces(self, frame_path: Path, frame_index: int) -> List[FaceData]:
+        """Detect faces in a frame and generate embeddings.
+
+        Args:
+            frame_path: Path to the frame image
+            frame_index: Index of the frame in the video
+
+        Returns:
+            List of FaceData objects with bboxes, embeddings, and confidence
+        """
+        image = cv2.imread(str(frame_path))
+        if image is None:
+            raise ValueError(f"Could not read image: {frame_path}")
+
+        detections = self.detector.detect(image)
+
+        faces = []
+        for i, det in enumerate(detections):
+            bbox = tuple(int(v) for v in det.bbox)  # (x1, y1, x2, y2)
+            confidence = det.confidence
+            landmarks = det.landmarks
+
+            embedding = self.recognizer.get_normalized_embedding(image, landmarks)
+            embedding = embedding.flatten()
+
+            faces.append(
+                FaceData(
+                    id=frame_index * 100 + i,
+                    frame_path=frame_path,
+                    frame_index=frame_index,
+                    bbox=bbox,
+                    embedding=embedding,
+                    confidence=confidence,
+                    landmarks=landmarks,
+                )
+            )
+
+        return faces
+
+    def close(self):
+        """Release resources."""
+        pass