From e17b3410cb2c7d115c91e26762b77fa9ee700a61 Mon Sep 17 00:00:00 2001 From: Attila Roshani Date: Tue, 25 Nov 2025 05:08:50 +0000 Subject: [PATCH 1/2] Add configurable score threshold --- docaligner/aligner.py | 3 +- docaligner/heatmap_reg/infer.py | 130 ++++++++++++++++---------------- docaligner/point_reg/infer.py | 7 +- 3 files changed, 74 insertions(+), 66 deletions(-) diff --git a/docaligner/aligner.py b/docaligner/aligner.py index 92c1350..31c61be 100644 --- a/docaligner/aligner.py +++ b/docaligner/aligner.py @@ -67,8 +67,9 @@ def __call__( self, img: np.ndarray, do_center_crop: bool = False, + threshold: float = 0.3, ) -> Union[np.ndarray]: - return self.detector(img, do_center_crop) + return self.detector(img, do_center_crop, threshold=threshold) def __repr__(self) -> str: return f'{self.detector.__class__.__name__}({self.detector.model})' diff --git a/docaligner/heatmap_reg/infer.py b/docaligner/heatmap_reg/infer.py index 1884947..7e3d75e 100644 --- a/docaligner/heatmap_reg/infer.py +++ b/docaligner/heatmap_reg/infer.py @@ -8,67 +8,6 @@ __all__ = ['Inference'] -def preprocess( - img: np.ndarray, - img_size_infer: Tuple[int, int] = None, - do_center_crop: bool = False, - return_tensor: bool = True, -): - if not cb.is_numpy_img(img): - raise ValueError("Input image must be numpy array.") - - h, w = img.shape[0:2] - center_crop_align = [0, 0] - - if do_center_crop: - img = cb.centercrop(img) - if h > w: - center_crop_align = [0, (h - w) // 2] - else: - center_crop_align = [(w - h) // 2, 0] - - nh, nw = img.shape[0:2] - if img_size_infer is not None: - img = cb.imresize(img, size=img_size_infer) - - if return_tensor: - img = np.transpose(img, axes=(2, 0, 1)).astype('float32') - img = img[None] / 255. - - return { - 'input': {'img': img}, - 'img_size_ori': (nh, nw), - 'img_size_infer': img_size_infer, - 'return_tensor': return_tensor, - 'center_crop_align': center_crop_align - } - - -def postprocess( - preds: np.ndarray, # (1, 4, H, W) - imgs_size: Tuple[int, int], - heatmap_threshold: float = 0.3 -) -> List[float]: - - def _get_point_with_max_area(mask): - polygons = cb.Polygons.from_image(mask).drop_empty() - if len(polygons) > 0: - polygons = polygons[polygons.area == polygons.area.max()] - return polygons.centroid.flatten().tolist() - - polygon = [] - for ii, pred in enumerate(preds[0]): - pred = cb.imresize(pred, size=imgs_size) - pred[pred < heatmap_threshold] = 0 - pred = np.uint8(pred * 255) - pred = cb.imbinarize(pred) - point = _get_point_with_max_area(pred) - if len(point) == 2 and ii < 4: - polygon.append(point) - - return polygon - - class Inference: configs = { @@ -100,26 +39,91 @@ def __init__( self.cfg = cfg = self.configs[model_cfg] self.img_size_infer = cfg['img_size_infer'] model_path = self.root / cfg['model_path'] + if not cb.Path(model_path).exists(): cb.download_from_google( cfg['file_id'], model_path.name, str(DIR / 'ckpt')) self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs) + def preprocess( + self, + img: np.ndarray, + img_size_infer: Tuple[int, int] = None, + do_center_crop: bool = False, + return_tensor: bool = True, + ): + if not cb.is_numpy_img(img): + raise ValueError("Input image must be numpy array.") + + h, w = img.shape[0:2] + center_crop_align = [0, 0] + + if do_center_crop: + img = cb.centercrop(img) + if h > w: + center_crop_align = [0, (h - w) // 2] + else: + center_crop_align = [(w - h) // 2, 0] + + nh, nw = img.shape[0:2] + if img_size_infer is not None: + img = cb.imresize(img, size=img_size_infer) + + if return_tensor: + img = np.transpose(img, axes=(2, 0, 1)).astype('float32') + img = img[None] / 255. + + return { + 'input': {'img': img}, + 'img_size_ori': (nh, nw), + 'img_size_infer': img_size_infer, + 'return_tensor': return_tensor, + 'center_crop_align': center_crop_align + } + + + def postprocess( + self, + preds: np.ndarray, # (1, 4, H, W) + imgs_size: Tuple[int, int], + heatmap_threshold: float = 0.3, + ) -> List[float]: + + def _get_point_with_max_area(mask): + polygons = cb.Polygons.from_image(mask).drop_empty() + if len(polygons) > 0: + polygons = polygons[polygons.area == polygons.area.max()] + return polygons.centroid.flatten().tolist() + + polygon = [] + for ii, pred in enumerate(preds[0]): + pred = cb.imresize(pred, size=imgs_size) + pred[pred < heatmap_threshold] = 0 + pred = np.uint8(pred * 255) + pred = cb.imbinarize(pred) + point = _get_point_with_max_area(pred) + if len(point) == 2 and ii < 4: + polygon.append(point) + + return polygon + def __call__( self, img: np.ndarray, do_center_crop: bool = False, + threshold: float = 0.3, ) -> np.ndarray: - img_infos = preprocess( + img_infos = self.preprocess( img=img, img_size_infer=self.img_size_infer, do_center_crop=do_center_crop ) x = self.model(**img_infos['input']) - polygon = postprocess( + polygon = self.postprocess( preds=x['heatmap'], imgs_size=img_infos['img_size_ori'], + heatmap_threshold=threshold ) polygon = np.array(polygon) diff --git a/docaligner/point_reg/infer.py b/docaligner/point_reg/infer.py index 32354bb..ae63a6c 100644 --- a/docaligner/point_reg/infer.py +++ b/docaligner/point_reg/infer.py @@ -47,9 +47,10 @@ def preprocess( def postprocess( points: np.ndarray, has_obj: bool, - imgs_size: Tuple[int, int] + imgs_size: Tuple[int, int], + point_threshold: float = 0.5, ) -> np.ndarray: - if has_obj > 0.5: + if has_obj > point_threshold: points = points.reshape(4, 2) polygon = points * np.array(imgs_size[::-1]) else: @@ -88,6 +89,7 @@ def __call__( self, img: np.ndarray, do_center_crop: bool = False, + threshold: float = 0.5, ) -> np.ndarray: img_infos = preprocess( img=img, @@ -99,6 +101,7 @@ def __call__( points=x['points'], has_obj=x['has_obj'], imgs_size=img_infos['img_size_ori'], + point_threshold=threshold, ) if len(polygon): From 5335058c871573a822aa4bb4a08016abcc5d6373 Mon Sep 17 00:00:00 2001 From: Attila Roshani Date: Tue, 25 Nov 2025 15:28:14 +0330 Subject: [PATCH 2/2] Refactor preprocess and postprocess methods into Inference classed --- docaligner/point_reg/infer.py | 104 +++++++++++++++++----------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/docaligner/point_reg/infer.py b/docaligner/point_reg/infer.py index ae63a6c..7fba6f1 100644 --- a/docaligner/point_reg/infer.py +++ b/docaligner/point_reg/infer.py @@ -8,56 +8,6 @@ __all__ = ['Inference'] -def preprocess( - img: np.ndarray, - img_size_infer: Tuple[int, int] = None, - do_center_crop: bool = False, - return_tensor: bool = True, -): - if not cb.is_numpy_img(img): - raise ValueError("Input image must be numpy array.") - - h, w = img.shape[0:2] - center_crop_align = [0, 0] - - if do_center_crop: - img = cb.centercrop(img) - if h > w: - center_crop_align = [0, (h - w) // 2] - else: - center_crop_align = [(w - h) // 2, 0] - - nh, nw = img.shape[0:2] - if img_size_infer is not None: - img = cb.imresize(img, size=img_size_infer) - - if return_tensor: - img = np.transpose(img, axes=(2, 0, 1)).astype('float32') - img = img[None] / 255 - - return { - 'input': {'img': img}, - 'img_size_ori': (nh, nw), - 'img_size_infer': img_size_infer, - 'return_tensor': return_tensor, - 'center_crop_align': center_crop_align - } - - -def postprocess( - points: np.ndarray, - has_obj: bool, - imgs_size: Tuple[int, int], - point_threshold: float = 0.5, -) -> np.ndarray: - if has_obj > point_threshold: - points = points.reshape(4, 2) - polygon = points * np.array(imgs_size[::-1]) - else: - polygon = np.array([]) - return polygon - - class Inference: configs = { @@ -85,19 +35,69 @@ def __init__( self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs) + def preprocess( + self, + img: np.ndarray, + img_size_infer: Tuple[int, int] = None, + do_center_crop: bool = False, + return_tensor: bool = True, + ): + if not cb.is_numpy_img(img): + raise ValueError("Input image must be numpy array.") + + h, w = img.shape[0:2] + center_crop_align = [0, 0] + + if do_center_crop: + img = cb.centercrop(img) + if h > w: + center_crop_align = [0, (h - w) // 2] + else: + center_crop_align = [(w - h) // 2, 0] + + nh, nw = img.shape[0:2] + if img_size_infer is not None: + img = cb.imresize(img, size=img_size_infer) + + if return_tensor: + img = np.transpose(img, axes=(2, 0, 1)).astype('float32') + img = img[None] / 255 + + return { + 'input': {'img': img}, + 'img_size_ori': (nh, nw), + 'img_size_infer': img_size_infer, + 'return_tensor': return_tensor, + 'center_crop_align': center_crop_align + } + + def postprocess( + self, + points: np.ndarray, + has_obj: bool, + imgs_size: Tuple[int, int], + point_threshold: float = 0.5, + ) -> np.ndarray: + if has_obj > point_threshold: + points = points.reshape(4, 2) + polygon = points * np.array(imgs_size[::-1]) + else: + polygon = np.array([]) + return polygon + def __call__( self, img: np.ndarray, do_center_crop: bool = False, threshold: float = 0.5, ) -> np.ndarray: - img_infos = preprocess( + img_infos = self.preprocess( img=img, img_size_infer=self.img_size_infer, do_center_crop=do_center_crop ) x = self.model(**img_infos['input']) - polygon = postprocess( + polygon = self.postprocess( points=x['points'], has_obj=x['has_obj'], imgs_size=img_infos['img_size_ori'],