import numpy as np from PIL import Image, ImageDraw from facenet_pytorch import MTCNN class FaceCropper: """Detect faces and return (cropped_face, annotated_image).""" def __init__(self, device: str | None = None, image_size: int = 224): import torch self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") self.mtcnn = MTCNN(keep_all=True, device=self.device) self.image_size = image_size def _to_pil(self, img): if isinstance(img, Image.Image): return img.convert("RGB") return Image.fromarray(img).convert("RGB") def detect_and_crop(self, img, select="largest"): pil = self._to_pil(img) boxes, probs = self.mtcnn.detect(pil) annotated = pil.copy() draw = ImageDraw.Draw(annotated) if boxes is None or len(boxes) == 0: return None, annotated, {"boxes": np.empty((0,4)), "scores": np.empty((0,))} for b, p in zip(boxes, probs): x1, y1, x2, y2 = map(float, b) draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 0), width=3) draw.text((x1, y1-10), f"{p:.2f}", fill=(255, 0, 0)) idx = 0 if select == "largest": areas = [(b[2]-b[0])*(b[3]-b[1]) for b in boxes] idx = int(np.argmax(areas)) elif isinstance(select, int) and 0 <= select < len(boxes): idx = select x1, y1, x2, y2 = boxes[idx].astype(int) face = pil.crop((x1, y1, x2, y2)) return face, annotated, {"boxes": boxes, "scores": probs}