Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docaligner/aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ def __call__(
self,
img: np.ndarray,
do_center_crop: bool = False,
threshold: float = 0.3,
) -> Union[np.ndarray]:
return self.detector(img, do_center_crop)
return self.detector(img, do_center_crop, threshold=threshold)

def __repr__(self) -> str:
return f'{self.detector.__class__.__name__}({self.detector.model})'
130 changes: 67 additions & 63 deletions docaligner/heatmap_reg/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,67 +8,6 @@
__all__ = ['Inference']


def preprocess(
img: np.ndarray,
img_size_infer: Tuple[int, int] = None,
do_center_crop: bool = False,
return_tensor: bool = True,
):
if not cb.is_numpy_img(img):
raise ValueError("Input image must be numpy array.")

h, w = img.shape[0:2]
center_crop_align = [0, 0]

if do_center_crop:
img = cb.centercrop(img)
if h > w:
center_crop_align = [0, (h - w) // 2]
else:
center_crop_align = [(w - h) // 2, 0]

nh, nw = img.shape[0:2]
if img_size_infer is not None:
img = cb.imresize(img, size=img_size_infer)

if return_tensor:
img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
img = img[None] / 255.

return {
'input': {'img': img},
'img_size_ori': (nh, nw),
'img_size_infer': img_size_infer,
'return_tensor': return_tensor,
'center_crop_align': center_crop_align
}


def postprocess(
preds: np.ndarray, # (1, 4, H, W)
imgs_size: Tuple[int, int],
heatmap_threshold: float = 0.3
) -> List[float]:

def _get_point_with_max_area(mask):
polygons = cb.Polygons.from_image(mask).drop_empty()
if len(polygons) > 0:
polygons = polygons[polygons.area == polygons.area.max()]
return polygons.centroid.flatten().tolist()

polygon = []
for ii, pred in enumerate(preds[0]):
pred = cb.imresize(pred, size=imgs_size)
pred[pred < heatmap_threshold] = 0
pred = np.uint8(pred * 255)
pred = cb.imbinarize(pred)
point = _get_point_with_max_area(pred)
if len(point) == 2 and ii < 4:
polygon.append(point)

return polygon


class Inference:

configs = {
Expand Down Expand Up @@ -100,26 +39,91 @@ def __init__(
self.cfg = cfg = self.configs[model_cfg]
self.img_size_infer = cfg['img_size_infer']
model_path = self.root / cfg['model_path']

if not cb.Path(model_path).exists():
cb.download_from_google(
cfg['file_id'], model_path.name, str(DIR / 'ckpt'))

self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs)

def preprocess(
self,
img: np.ndarray,
img_size_infer: Tuple[int, int] = None,
do_center_crop: bool = False,
return_tensor: bool = True,
):
if not cb.is_numpy_img(img):
raise ValueError("Input image must be numpy array.")

h, w = img.shape[0:2]
center_crop_align = [0, 0]

if do_center_crop:
img = cb.centercrop(img)
if h > w:
center_crop_align = [0, (h - w) // 2]
else:
center_crop_align = [(w - h) // 2, 0]

nh, nw = img.shape[0:2]
if img_size_infer is not None:
img = cb.imresize(img, size=img_size_infer)

if return_tensor:
img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
img = img[None] / 255.

return {
'input': {'img': img},
'img_size_ori': (nh, nw),
'img_size_infer': img_size_infer,
'return_tensor': return_tensor,
'center_crop_align': center_crop_align
}


def postprocess(
self,
preds: np.ndarray, # (1, 4, H, W)
imgs_size: Tuple[int, int],
heatmap_threshold: float = 0.3,
) -> List[float]:

def _get_point_with_max_area(mask):
polygons = cb.Polygons.from_image(mask).drop_empty()
if len(polygons) > 0:
polygons = polygons[polygons.area == polygons.area.max()]
return polygons.centroid.flatten().tolist()

polygon = []
for ii, pred in enumerate(preds[0]):
pred = cb.imresize(pred, size=imgs_size)
pred[pred < heatmap_threshold] = 0
pred = np.uint8(pred * 255)
pred = cb.imbinarize(pred)
point = _get_point_with_max_area(pred)
if len(point) == 2 and ii < 4:
polygon.append(point)

return polygon

def __call__(
self,
img: np.ndarray,
do_center_crop: bool = False,
threshold: float = 0.3,
) -> np.ndarray:
img_infos = preprocess(
img_infos = self.preprocess(
img=img,
img_size_infer=self.img_size_infer,
do_center_crop=do_center_crop
)
x = self.model(**img_infos['input'])
polygon = postprocess(
polygon = self.postprocess(
preds=x['heatmap'],
imgs_size=img_infos['img_size_ori'],
heatmap_threshold=threshold
)
polygon = np.array(polygon)

Expand Down
105 changes: 54 additions & 51 deletions docaligner/point_reg/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,6 @@
__all__ = ['Inference']


def preprocess(
img: np.ndarray,
img_size_infer: Tuple[int, int] = None,
do_center_crop: bool = False,
return_tensor: bool = True,
):
if not cb.is_numpy_img(img):
raise ValueError("Input image must be numpy array.")

h, w = img.shape[0:2]
center_crop_align = [0, 0]

if do_center_crop:
img = cb.centercrop(img)
if h > w:
center_crop_align = [0, (h - w) // 2]
else:
center_crop_align = [(w - h) // 2, 0]

nh, nw = img.shape[0:2]
if img_size_infer is not None:
img = cb.imresize(img, size=img_size_infer)

if return_tensor:
img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
img = img[None] / 255

return {
'input': {'img': img},
'img_size_ori': (nh, nw),
'img_size_infer': img_size_infer,
'return_tensor': return_tensor,
'center_crop_align': center_crop_align
}


def postprocess(
points: np.ndarray,
has_obj: bool,
imgs_size: Tuple[int, int]
) -> np.ndarray:
if has_obj > 0.5:
points = points.reshape(4, 2)
polygon = points * np.array(imgs_size[::-1])
else:
polygon = np.array([])
return polygon


class Inference:

configs = {
Expand Down Expand Up @@ -84,21 +35,73 @@ def __init__(

self.model = cb.ONNXEngine(model_path, gpu_id, backend, **kwargs)

def preprocess(
self,
img: np.ndarray,
img_size_infer: Tuple[int, int] = None,
do_center_crop: bool = False,
return_tensor: bool = True,
):
if not cb.is_numpy_img(img):
raise ValueError("Input image must be numpy array.")

h, w = img.shape[0:2]
center_crop_align = [0, 0]

if do_center_crop:
img = cb.centercrop(img)
if h > w:
center_crop_align = [0, (h - w) // 2]
else:
center_crop_align = [(w - h) // 2, 0]

nh, nw = img.shape[0:2]
if img_size_infer is not None:
img = cb.imresize(img, size=img_size_infer)

if return_tensor:
img = np.transpose(img, axes=(2, 0, 1)).astype('float32')
img = img[None] / 255

return {
'input': {'img': img},
'img_size_ori': (nh, nw),
'img_size_infer': img_size_infer,
'return_tensor': return_tensor,
'center_crop_align': center_crop_align
}

def postprocess(
self,
points: np.ndarray,
has_obj: bool,
imgs_size: Tuple[int, int],
point_threshold: float = 0.5,
) -> np.ndarray:
if has_obj > point_threshold:
points = points.reshape(4, 2)
polygon = points * np.array(imgs_size[::-1])
else:
polygon = np.array([])
return polygon

def __call__(
self,
img: np.ndarray,
do_center_crop: bool = False,
threshold: float = 0.5,
) -> np.ndarray:
img_infos = preprocess(
img_infos = self.preprocess(
img=img,
img_size_infer=self.img_size_infer,
do_center_crop=do_center_crop
)
x = self.model(**img_infos['input'])
polygon = postprocess(
polygon = self.postprocess(
points=x['points'],
has_obj=x['has_obj'],
imgs_size=img_infos['img_size_ori'],
point_threshold=threshold,
)

if len(polygon):
Expand Down