Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/knn.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/main.cpython-310.pyc
Binary file not shown.
Binary file added __pycache__/preprocess.cpython-310.pyc
Binary file not shown.
Binary file added artifacts/embeddings_fixed.parquet
Binary file not shown.
79 changes: 79 additions & 0 deletions knn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# knn.py
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict

EPS = 1e-12


def l2_normalize(x: np.ndarray):
# 벡터 정규화 불가 (NaN, 0벡터)
n = np.linalg.norm(x)
if not np.isfinite(n) or n < EPS:
return None
return x / n


class FullKNN:
def __init__(self, embeddings_path: str):

# 임베딩 데이터 로드
df = pd.read_parquet(Path(embeddings_path))

# 임베딩 벡터 행렬화
X = np.vstack(df["vector"].apply(lambda v: np.asarray(v, np.float32)))
norms = np.linalg.norm(X, axis=1)

# NaN / zero-norm 벡터 제거
ok = np.isfinite(X).all(axis=1) & (norms > EPS)
df = df.loc[ok].reset_index(drop=True)
X = X[ok] / norms[ok][:, None]

# 임베딩 및 메타데이터 저장
self.X = X
self.gender = df["gender"].to_numpy()
self.animal = df["animal_type"].to_numpy()

# 성별별 인덱스 미리 구성
self.idx_by_gender = {
g: np.where(self.gender == g)[0]
for g in np.unique(self.gender)
}

def predict(self, user_vec: np.ndarray, gender: str, K: int = 50):

# 성별 데이터 x 중단
if gender not in self.idx_by_gender:
return []

# 사용자 임베딩 정규화
u = l2_normalize(np.asarray(user_vec, np.float32))
if u is None:
return []

# 성별에 해당하는 임베딩만 사용
idx = self.idx_by_gender[gender]
Xg = self.X[idx]
ag = self.animal[idx]

# 코사인 유사도 계산
sims = Xg @ u

# top-K 이웃 선택
K = min(K, len(sims))
top_idx = np.argpartition(-sims, K - 1)[:K]

# 동물상별 가중 투표 (유사도 합산)
scores = defaultdict(float)
for i in top_idx:
s = sims[i]
if s > 0:
scores[ag[i]] += float(s)

if not scores:
return []

# 가장 점수가 높은 동물상 반환
best = max(scores.items(), key=lambda x: x[1])
return [{"animal_type": best[0], "score": best[1]}]
69 changes: 69 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# main.py
import numpy as np
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from insightface.app import FaceAnalysis

from knn import FullKNN
from preprocess import biggest_face, safe_norm_vec, decode_upload_image


# ===== 설정 =====
EMB_PATH = "./artifacts/embeddings_fixed.parquet"
DET_SIZE = (640, 640)


# ===== 서버 시작 시 로딩 =====
face_app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
face_app.prepare(ctx_id=0, det_size=DET_SIZE)

knn = FullKNN(EMB_PATH)

app = FastAPI()


@app.post("/analyze")
async def analyze(
gender: str = Form(...),
file: UploadFile = File(...),
):
# 남녀 여부
if gender not in ("남자", "여자"):
raise HTTPException(400, "gender must be 남자 or 여자")

# 사진 여부
img_bytes = await file.read()
img = decode_upload_image(img_bytes)
if img is None:
raise HTTPException(400, "invalid image")

# 얼굴 여부
faces = face_app.get(img)
if not faces:
raise HTTPException(422, "no face detected")

f = biggest_face(faces)

# 얼굴 선명도
det_score = float(getattr(f, "det_score", 0.0))
if det_score < 0.5:
raise HTTPException(422, "face too unclear")

# 임베딩 불가
vec = safe_norm_vec(np.asarray(f.embedding, dtype=np.float32))
if vec is None:
raise HTTPException(422, "invalid embedding")

# === knn ===
result = knn.predict(
user_vec=vec,
gender=gender,
K=50
)

if not result:
raise HTTPException(422, "no prediction")

return {
"animal_type": result[0]["animal_type"],
"det_score": det_score,
}
26 changes: 26 additions & 0 deletions preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# preprocess.py
import numpy as np
import cv2


def biggest_face(faces):
# 여러 얼굴 중 가장 큰 얼굴 선택
return max(
faces,
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1])
)


def safe_norm_vec(vec: np.ndarray):
# 임베딩 벡터 정규화 (NaN / 0벡터 방어)
v = np.asarray(vec, dtype=np.float32)
n = np.linalg.norm(v)
if not np.isfinite(n) or n < 1e-12:
return None
return v / n


def decode_upload_image(img_bytes: bytes):
# 업로드된 이미지 byte → OpenCV 이미지 변환
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
return img