From e17b3410cb2c7d115c91e26762b77fa9ee700a61 Mon Sep 17 00:00:00 2001
From: Attila Roshani <atilaroshani@gmail.com>
Date: Tue, 25 Nov 2025 05:08:50 +0000
Subject: [PATCH 1/2] Add configurable score threshold

---
 docaligner/aligner.py           |   3 +-
 docaligner/heatmap_reg/infer.py | 130 ++++++++++++++++----------------
 docaligner/point_reg/infer.py   |   7 +-
 3 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/docaligner/aligner.py b/docaligner/aligner.py
index 92c1350..31c61be 100644
--- a/docaligner/aligner.py
+++ b/docaligner/aligner.py
@@ -67,8 +67,9 @@ def __call__(
         self,
         img: np.ndarray,
         do_center_crop: bool = False,
+        threshold: float = 0.3,
     ) -> Union[np.ndarray]:
-        return self.detector(img, do_center_crop)
+        return self.detector(img, do_center_crop, threshold=threshold)
 
     def __repr__(self) -> str:
         return f'{self.detector.__class__.__name__}({self.detector.model})'
diff --git a/docaligner/heatmap_reg/infer.py b/docaligner/heatmap_reg/infer.py
index 1884947..7e3d75e 100644
--- a/docaligner/heatmap_reg/infer.py
+++ b/docaligner/heatmap_reg/infer.py
@@ -8,67 +8,6 @@
 __all__ = ['Inference']
 
 
-def preprocess(
-    img: np.ndarray,
-    img_size_infer: Tuple[int, int] = None,
-    do_center_crop: bool = False,
-    return_tensor: bool = True,
-):
-    if not cb.is_numpy_img(img):
-        raise ValueError("Input image must be numpy array.")
-
-    h, w = img.shape[0:2]
-    center_crop_align = [0, 0]
-
-    if do_center_crop:
-        img = cb.centercrop(img)
-        if h > w:
-            center_crop_align = [0, (h - w) // 2]
-        else:
-            center_crop_align = [(w - h) // 2, 0]
-
-    nh, nw = img.shape[0:2]
-    if img_size_infer is not None:
-        img = cb.imresize(img, size=img_size_infer)
-
-    if return_tensor:
-        img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
-        img = img[None] / 255.
-
-    return {
-        'input': {'img': img},
-        'img_size_ori': (nh, nw),
-        'img_size_infer': img_size_infer,
-        'return_tensor': return_tensor,
-        'center_crop_align': center_crop_align
-    }
-
-
-def postprocess(
-    preds: np.ndarray,  # (1, 4, H, W)
-    imgs_size: Tuple[int, int],
-    heatmap_threshold: float = 0.3
-) -> List[float]:
-
-    def _get_point_with_max_area(mask):
-        polygons = cb.Polygons.from_image(mask).drop_empty()
-        if len(polygons) > 0:
-            polygons = polygons[polygons.area == polygons.area.max()]
-        return polygons.centroid.flatten().tolist()
-
-    polygon = []
-    for ii, pred in enumerate(preds[0]):
-        pred = cb.imresize(pred, size=imgs_size)
-        pred[pred < heatmap_threshold] = 0
-        pred = np.uint8(pred * 255)
-        pred = cb.imbinarize(pred)
-        point = _get_point_with_max_area(pred)
-        if len(point) == 2 and ii < 4:
-            polygon.append(point)
-
-    return polygon
-
-
 class Inference:
 
     configs = {
@@ -100,26 +39,91 @@ def __init__(
         self.cfg = cfg = self.configs[model_cfg]
         self.img_size_infer = cfg['img_size_infer']
         model_path = self.root / cfg['model_path']
+
         if not cb.Path(model_path).exists():
             cb.download_from_google(
                 cfg['file_id'], model_path.name, str(DIR / 'ckpt'))
 
         self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs)
 
+    def preprocess(
+        self,
+        img: np.ndarray,
+        img_size_infer: Tuple[int, int] = None,
+        do_center_crop: bool = False,
+        return_tensor: bool = True,
+    ):
+        if not cb.is_numpy_img(img):
+            raise ValueError("Input image must be numpy array.")
+
+        h, w = img.shape[0:2]
+        center_crop_align = [0, 0]
+
+        if do_center_crop:
+            img = cb.centercrop(img)
+            if h > w:
+                center_crop_align = [0, (h - w) // 2]
+            else:
+                center_crop_align = [(w - h) // 2, 0]
+
+        nh, nw = img.shape[0:2]
+        if img_size_infer is not None:
+            img = cb.imresize(img, size=img_size_infer)
+
+        if return_tensor:
+            img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
+            img = img[None] / 255.
+
+        return {
+            'input': {'img': img},
+            'img_size_ori': (nh, nw),
+            'img_size_infer': img_size_infer,
+            'return_tensor': return_tensor,
+            'center_crop_align': center_crop_align
+        }
+
+
+    def postprocess(
+        self,
+        preds: np.ndarray,  # (1, 4, H, W)
+        imgs_size: Tuple[int, int],
+        heatmap_threshold: float = 0.3,
+    ) -> List[float]:
+
+        def _get_point_with_max_area(mask):
+            polygons = cb.Polygons.from_image(mask).drop_empty()
+            if len(polygons) > 0:
+                polygons = polygons[polygons.area == polygons.area.max()]
+            return polygons.centroid.flatten().tolist()
+
+        polygon = []
+        for ii, pred in enumerate(preds[0]):
+            pred = cb.imresize(pred, size=imgs_size)
+            pred[pred < heatmap_threshold] = 0
+            pred = np.uint8(pred * 255)
+            pred = cb.imbinarize(pred)
+            point = _get_point_with_max_area(pred)
+            if len(point) == 2 and ii < 4:
+                polygon.append(point)
+
+        return polygon
+    
     def __call__(
         self,
         img: np.ndarray,
         do_center_crop: bool = False,
+        threshold: float = 0.3,
     ) -> np.ndarray:
-        img_infos = preprocess(
+        img_infos = self.preprocess(
             img=img,
             img_size_infer=self.img_size_infer,
             do_center_crop=do_center_crop
         )
         x = self.model(**img_infos['input'])
-        polygon = postprocess(
+        polygon = self.postprocess(
             preds=x['heatmap'],
             imgs_size=img_infos['img_size_ori'],
+            heatmap_threshold=threshold
         )
         polygon = np.array(polygon)
 
diff --git a/docaligner/point_reg/infer.py b/docaligner/point_reg/infer.py
index 32354bb..ae63a6c 100644
--- a/docaligner/point_reg/infer.py
+++ b/docaligner/point_reg/infer.py
@@ -47,9 +47,10 @@ def preprocess(
 def postprocess(
     points: np.ndarray,
     has_obj: bool,
-    imgs_size: Tuple[int, int]
+    imgs_size: Tuple[int, int],
+    point_threshold: float = 0.5,
 ) -> np.ndarray:
-    if has_obj > 0.5:
+    if has_obj > point_threshold:
         points = points.reshape(4, 2)
         polygon = points * np.array(imgs_size[::-1])
     else:
@@ -88,6 +89,7 @@ def __call__(
         self,
         img: np.ndarray,
         do_center_crop: bool = False,
+        threshold: float = 0.5,
     ) -> np.ndarray:
         img_infos = preprocess(
             img=img,
@@ -99,6 +101,7 @@ def __call__(
             points=x['points'],
             has_obj=x['has_obj'],
             imgs_size=img_infos['img_size_ori'],
+            point_threshold=threshold,
         )
 
         if len(polygon):

From 5335058c871573a822aa4bb4a08016abcc5d6373 Mon Sep 17 00:00:00 2001
From: Attila Roshani <atilaroshani@gmail.com>
Date: Tue, 25 Nov 2025 15:28:14 +0330
Subject: [PATCH 2/2] Refactor preprocess and postprocess methods into
 Inference classed

---
 docaligner/point_reg/infer.py | 104 +++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/docaligner/point_reg/infer.py b/docaligner/point_reg/infer.py
index ae63a6c..7fba6f1 100644
--- a/docaligner/point_reg/infer.py
+++ b/docaligner/point_reg/infer.py
@@ -8,56 +8,6 @@
 __all__ = ['Inference']
 
 
-def preprocess(
-    img: np.ndarray,
-    img_size_infer: Tuple[int, int] = None,
-    do_center_crop: bool = False,
-    return_tensor: bool = True,
-):
-    if not cb.is_numpy_img(img):
-        raise ValueError("Input image must be numpy array.")
-
-    h, w = img.shape[0:2]
-    center_crop_align = [0, 0]
-
-    if do_center_crop:
-        img = cb.centercrop(img)
-        if h > w:
-            center_crop_align = [0, (h - w) // 2]
-        else:
-            center_crop_align = [(w - h) // 2, 0]
-
-    nh, nw = img.shape[0:2]
-    if img_size_infer is not None:
-        img = cb.imresize(img, size=img_size_infer)
-
-    if return_tensor:
-        img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
-        img = img[None] / 255
-
-    return {
-        'input': {'img': img},
-        'img_size_ori': (nh, nw),
-        'img_size_infer': img_size_infer,
-        'return_tensor': return_tensor,
-        'center_crop_align': center_crop_align
-    }
-
-
-def postprocess(
-    points: np.ndarray,
-    has_obj: bool,
-    imgs_size: Tuple[int, int],
-    point_threshold: float = 0.5,
-) -> np.ndarray:
-    if has_obj > point_threshold:
-        points = points.reshape(4, 2)
-        polygon = points * np.array(imgs_size[::-1])
-    else:
-        polygon = np.array([])
-    return polygon
-
-
 class Inference:
 
     configs = {
@@ -85,19 +35,69 @@ def __init__(
 
         self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs)
 
+    def preprocess(
+        self,
+        img: np.ndarray,
+        img_size_infer: Tuple[int, int] = None,
+        do_center_crop: bool = False,
+        return_tensor: bool = True,
+    ):
+        if not cb.is_numpy_img(img):
+            raise ValueError("Input image must be numpy array.")
+    
+        h, w = img.shape[0:2]
+        center_crop_align = [0, 0]
+    
+        if do_center_crop:
+            img = cb.centercrop(img)
+            if h > w:
+                center_crop_align = [0, (h - w) // 2]
+            else:
+                center_crop_align = [(w - h) // 2, 0]
+    
+        nh, nw = img.shape[0:2]
+        if img_size_infer is not None:
+            img = cb.imresize(img, size=img_size_infer)
+    
+        if return_tensor:
+            img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
+            img = img[None] / 255
+    
+        return {
+            'input': {'img': img},
+            'img_size_ori': (nh, nw),
+            'img_size_infer': img_size_infer,
+            'return_tensor': return_tensor,
+            'center_crop_align': center_crop_align
+        }
+    
+    def postprocess(
+        self,
+        points: np.ndarray,
+        has_obj: bool,
+        imgs_size: Tuple[int, int],
+        point_threshold: float = 0.5,
+    ) -> np.ndarray:
+        if has_obj > point_threshold:
+            points = points.reshape(4, 2)
+            polygon = points * np.array(imgs_size[::-1])
+        else:
+            polygon = np.array([])
+        return polygon
+        
     def __call__(
         self,
         img: np.ndarray,
         do_center_crop: bool = False,
         threshold: float = 0.5,
     ) -> np.ndarray:
-        img_infos = preprocess(
+        img_infos = self.preprocess(
             img=img,
             img_size_infer=self.img_size_infer,
             do_center_crop=do_center_crop
         )
         x = self.model(**img_infos['input'])
-        polygon = postprocess(
+        polygon = self.postprocess(
             points=x['points'],
             has_obj=x['has_obj'],
             imgs_size=img_infos['img_size_ori'],