diff --git a/__pycache__/knn.cpython-310.pyc b/__pycache__/knn.cpython-310.pyc new file mode 100644 index 0000000..cdaf5bd Binary files /dev/null and b/__pycache__/knn.cpython-310.pyc differ diff --git a/__pycache__/main.cpython-310.pyc b/__pycache__/main.cpython-310.pyc new file mode 100644 index 0000000..85800b7 Binary files /dev/null and b/__pycache__/main.cpython-310.pyc differ diff --git a/__pycache__/preprocess.cpython-310.pyc b/__pycache__/preprocess.cpython-310.pyc new file mode 100644 index 0000000..07dd13a Binary files /dev/null and b/__pycache__/preprocess.cpython-310.pyc differ diff --git a/artifacts/embeddings_fixed.parquet b/artifacts/embeddings_fixed.parquet new file mode 100644 index 0000000..3bbd594 Binary files /dev/null and b/artifacts/embeddings_fixed.parquet differ diff --git a/knn.py b/knn.py new file mode 100644 index 0000000..ddeb773 --- /dev/null +++ b/knn.py @@ -0,0 +1,79 @@ +# knn.py +import numpy as np +import pandas as pd +from pathlib import Path +from collections import defaultdict + +EPS = 1e-12 + + +def l2_normalize(x: np.ndarray): + # 벡터 정규화 불가 (NaN, 0벡터) + n = np.linalg.norm(x) + if not np.isfinite(n) or n < EPS: + return None + return x / n + + +class FullKNN: + def __init__(self, embeddings_path: str): + + # 임베딩 데이터 로드 + df = pd.read_parquet(Path(embeddings_path)) + + # 임베딩 벡터 행렬화 + X = np.vstack(df["vector"].apply(lambda v: np.asarray(v, np.float32))) + norms = np.linalg.norm(X, axis=1) + + # NaN / zero-norm 벡터 제거 + ok = np.isfinite(X).all(axis=1) & (norms > EPS) + df = df.loc[ok].reset_index(drop=True) + X = X[ok] / norms[ok][:, None] + + # 임베딩 및 메타데이터 저장 + self.X = X + self.gender = df["gender"].to_numpy() + self.animal = df["animal_type"].to_numpy() + + # 성별별 인덱스 미리 구성 + self.idx_by_gender = { + g: np.where(self.gender == g)[0] + for g in np.unique(self.gender) + } + + def predict(self, user_vec: np.ndarray, gender: str, K: int = 50): + + # 성별 데이터 x 중단 + if gender not in self.idx_by_gender: + return [] + + # 사용자 임베딩 정규화 + u = l2_normalize(np.asarray(user_vec, np.float32)) + if u is None: + return [] + + # 성별에 해당하는 임베딩만 사용 + idx = self.idx_by_gender[gender] + Xg = self.X[idx] + ag = self.animal[idx] + + # 코사인 유사도 계산 + sims = Xg @ u + + # top-K 이웃 선택 + K = min(K, len(sims)) + top_idx = np.argpartition(-sims, K - 1)[:K] + + # 동물상별 가중 투표 (유사도 합산) + scores = defaultdict(float) + for i in top_idx: + s = sims[i] + if s > 0: + scores[ag[i]] += float(s) + + if not scores: + return [] + + # 가장 점수가 높은 동물상 반환 + best = max(scores.items(), key=lambda x: x[1]) + return [{"animal_type": best[0], "score": best[1]}] diff --git a/main.py b/main.py new file mode 100644 index 0000000..9ec38b8 --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ +# main.py +import numpy as np +from fastapi import FastAPI, UploadFile, File, Form, HTTPException +from insightface.app import FaceAnalysis + +from knn import FullKNN +from preprocess import biggest_face, safe_norm_vec, decode_upload_image + + +# ===== 설정 ===== +EMB_PATH = "./artifacts/embeddings_fixed.parquet" +DET_SIZE = (640, 640) + + +# ===== 서버 시작 시 로딩 ===== +face_app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"]) +face_app.prepare(ctx_id=0, det_size=DET_SIZE) + +knn = FullKNN(EMB_PATH) + +app = FastAPI() + + +@app.post("/analyze") +async def analyze( + gender: str = Form(...), + file: UploadFile = File(...), +): + # 남녀 여부 + if gender not in ("남자", "여자"): + raise HTTPException(400, "gender must be 남자 or 여자") + + # 사진 여부 + img_bytes = await file.read() + img = decode_upload_image(img_bytes) + if img is None: + raise HTTPException(400, "invalid image") + + # 얼굴 여부 + faces = face_app.get(img) + if not faces: + raise HTTPException(422, "no face detected") + + f = biggest_face(faces) + + # 얼굴 선명도 + det_score = float(getattr(f, "det_score", 0.0)) + if det_score < 0.5: + raise HTTPException(422, "face too unclear") + + # 임베딩 불가 + vec = safe_norm_vec(np.asarray(f.embedding, dtype=np.float32)) + if vec is None: + raise HTTPException(422, "invalid embedding") + + # === knn === + result = knn.predict( + user_vec=vec, + gender=gender, + K=50 + ) + + if not result: + raise HTTPException(422, "no prediction") + + return { + "animal_type": result[0]["animal_type"], + "det_score": det_score, + } diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..cbbd013 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,26 @@ +# preprocess.py +import numpy as np +import cv2 + + +def biggest_face(faces): + # 여러 얼굴 중 가장 큰 얼굴 선택 + return max( + faces, + key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]) + ) + + +def safe_norm_vec(vec: np.ndarray): + # 임베딩 벡터 정규화 (NaN / 0벡터 방어) + v = np.asarray(vec, dtype=np.float32) + n = np.linalg.norm(v) + if not np.isfinite(n) or n < 1e-12: + return None + return v / n + + +def decode_upload_image(img_bytes: bytes): + # 업로드된 이미지 byte → OpenCV 이미지 변환 + img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR) + return img