Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
</p>

[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v1.0.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)


**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "doctr-api"
version = "0.12.1a0"
version = "1.0.0a0"
description = "Backend template for your OCR API with docTR"
authors = ["Mindee <contact@mindee.com>"]
license = "Apache-2.0"
Expand Down
41 changes: 19 additions & 22 deletions doctr/models/preprocessor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,21 @@ def batch_inputs(self, samples: list[torch.Tensor]) -> list[torch.Tensor]:

return batches

def sample_transforms(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
def sample_transforms(self, x: np.ndarray) -> torch.Tensor:
if x.ndim != 3:
raise AssertionError("expected list of 3D Tensors")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float32, np.float16):
raise TypeError("unsupported data type for numpy.ndarray")
x = torch.from_numpy(x.copy()).permute(2, 0, 1)
elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
raise TypeError("unsupported data type for torch.Tensor")
if x.dtype not in (np.uint8, np.float32, np.float16):
raise TypeError("unsupported data type for numpy.ndarray")
tensor = torch.from_numpy(x.copy()).permute(2, 0, 1)
# Resizing
x = self.resize(x)
tensor = self.resize(tensor)
# Data type
if x.dtype == torch.uint8:
x = x.to(dtype=torch.float32).div(255).clip(0, 1) # type: ignore[union-attr]
if tensor.dtype == torch.uint8:
tensor = tensor.to(dtype=torch.float32).div(255).clip(0, 1)
else:
x = x.to(dtype=torch.float32) # type: ignore[union-attr]
tensor = tensor.to(dtype=torch.float32)

return x
return tensor

def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[torch.Tensor]:
"""Prepare document data for model forwarding
Expand All @@ -94,29 +91,29 @@ def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[torch.Tensor]:
raise AssertionError("expected 4D Tensor")
if x.dtype not in (np.uint8, np.float32, np.float16):
raise TypeError("unsupported data type for numpy.ndarray")
x = torch.from_numpy(x.copy()).permute(0, 3, 1, 2) # type: ignore[assignment]
tensor = torch.from_numpy(x.copy()).permute(0, 3, 1, 2)

# Resizing
if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
x = F.resize(
x, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
if tensor.shape[-2] != self.resize.size[0] or tensor.shape[-1] != self.resize.size[1]:
tensor = F.resize(
tensor, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
)
# Data type
if x.dtype == torch.uint8: # type: ignore[union-attr]
x = x.to(dtype=torch.float32).div(255).clip(0, 1) # type: ignore[union-attr]
if tensor.dtype == torch.uint8:
tensor = tensor.to(dtype=torch.float32).div(255).clip(0, 1)
else:
x = x.to(dtype=torch.float32) # type: ignore[union-attr]
batches = [x]
tensor = tensor.to(dtype=torch.float32)
batches = [tensor]

elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x):
# Sample transform (to tensor, resize)
samples = list(multithread_exec(self.sample_transforms, x))
# Batching
batches = self.batch_inputs(samples) # type: ignore[assignment]
batches = self.batch_inputs(samples)
else:
raise TypeError(f"invalid input type: {type(x)}")

# Batch transforms (normalize)
batches = list(multithread_exec(self.normalize, batches))

return batches # type: ignore[return-value]
return batches
46 changes: 24 additions & 22 deletions doctr/transforms/modules/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,21 @@


class Resize(T.Resize):
"""Resize the input image to the given size"""
"""Resize the input image to the given size

>>> import torch
>>> from doctr.transforms import Resize
>>> transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=True)
>>> out = transfo(torch.rand((3, 64, 64)))

Args:
size: output size in pixels, either a tuple (height, width) or a single integer for square images
interpolation: interpolation mode to use for resizing, default is bilinear
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
if True, the image will be resized to fit within the target size while maintaining its aspect ratio
symmetric_pad: whether to symmetrically pad the image to the target size,
if True, the image will be padded equally on both sides to fit the target size
"""

def __init__(
self,
Expand All @@ -36,42 +50,30 @@ def __init__(
preserve_aspect_ratio: bool = False,
symmetric_pad: bool = False,
) -> None:
super().__init__(size, interpolation, antialias=True)
super().__init__(size if isinstance(size, (list, tuple)) else (size, size), interpolation, antialias=True)
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad

if not isinstance(self.size, (int, tuple, list)):
raise AssertionError("size should be either a tuple, a list or an int")

def forward(
self,
img: torch.Tensor,
target: np.ndarray | None = None,
) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
if isinstance(self.size, int):
target_ratio = img.shape[-2] / img.shape[-1]
else:
target_ratio = self.size[0] / self.size[1]
target_ratio = self.size[0] / self.size[1]
actual_ratio = img.shape[-2] / img.shape[-1]

if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))):
if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
# If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one
# We can use with the regular resize
if target is not None:
return super().forward(img), target
return super().forward(img)
else:
# Resize
if isinstance(self.size, (tuple, list)):
if actual_ratio > target_ratio:
tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
else:
tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
elif isinstance(self.size, int): # self.size is the longest side, infer the other
if img.shape[-2] <= img.shape[-1]:
tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
else:
tmp_size = (self.size, max(int(self.size / actual_ratio), 1))
if actual_ratio > target_ratio:
tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
else:
tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])

# Scale image
img = F.resize(img, tmp_size, self.interpolation, antialias=True)
Expand All @@ -93,14 +95,14 @@ def forward(
if self.preserve_aspect_ratio:
# Get absolute coords
if target.shape[1:] == (4,):
if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
if self.symmetric_pad:
target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2]
else:
target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
elif target.shape[1:] == (4, 2):
if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
if self.symmetric_pad:
target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2]
else:
Expand Down
83 changes: 50 additions & 33 deletions tests/pytorch/test_transforms_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,60 +27,77 @@ def test_resize():

assert torch.all(out == 1)
assert out.shape[-2:] == output_size
assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='bilinear')"
assert repr(transfo) == "Resize(output_size=(32, 32), interpolation='bilinear')"

transfo = Resize(output_size, preserve_aspect_ratio=True)
# Test with preserve_aspect_ratio
output_size = (32, 32)
input_t = torch.ones((3, 32, 64), dtype=torch.float32)
out = transfo(input_t)

# Asymmetric padding
transfo = Resize(output_size, preserve_aspect_ratio=True)
out = transfo(input_t)
assert out.shape[-2:] == output_size
assert not torch.all(out == 1)
# Asymetric padding
assert torch.all(out[:, -1] == 0) and torch.all(out[:, 0] == 1)

# Symetric padding
transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
)
# Symmetric padding
transfo = Resize(32, preserve_aspect_ratio=True, symmetric_pad=True)
out = transfo(input_t)
assert out.shape[-2:] == output_size
# symetric padding
assert torch.all(out[:, -1] == 0) and torch.all(out[:, 0] == 0)
assert torch.all(out[:, 0] == 0) and torch.all(out[:, -1] == 0)

# Inverse aspect ratio
expected = "Resize(output_size=(32, 32), interpolation='bilinear', preserve_aspect_ratio=True, symmetric_pad=True)"
assert repr(transfo) == expected

# Test with inverse resize
input_t = torch.ones((3, 64, 32), dtype=torch.float32)
transfo = Resize(32, preserve_aspect_ratio=True, symmetric_pad=True)
out = transfo(input_t)
assert out.shape[-2:] == (32, 32)

assert not torch.all(out == 1)
assert out.shape[-2:] == output_size

# Same aspect ratio
output_size = (32, 128)
transfo = Resize(output_size, preserve_aspect_ratio=True)
# Test resize with same ratio
transfo = Resize((32, 128), preserve_aspect_ratio=True)
out = transfo(torch.ones((3, 16, 64), dtype=torch.float32))
assert out.shape[-2:] == output_size
assert out.shape[-2:] == (32, 128)

# FP16
# Test with fp16 input
transfo = Resize((32, 128), preserve_aspect_ratio=True)
input_t = torch.ones((3, 64, 64), dtype=torch.float16)
out = transfo(input_t)
assert out.dtype == torch.float16

# --- Test with target (bounding boxes) ---

target_boxes = np.array([[0.1, 0.1, 0.9, 0.9], [0.2, 0.2, 0.8, 0.8]])
output_size = (64, 64)

transfo = Resize(output_size, preserve_aspect_ratio=True)
padding = [True, False]
for symmetric_pad in padding:
# Test with target boxes
target_boxes = np.array([[0.1, 0.1, 0.3, 0.4], [0.2, 0.2, 0.8, 0.8]])
transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=symmetric_pad)
input_t = torch.ones((3, 32, 64), dtype=torch.float32)
out, new_target = transfo(input_t, target_boxes)

assert out.shape[-2:] == (64, 64)
assert new_target.shape == target_boxes.shape
assert np.all((0 <= new_target) & (new_target <= 1))

# Test with target polygons
target_boxes = np.array([
[[0.1, 0.1], [0.9, 0.1], [0.9, 0.9], [0.1, 0.9]],
[[0.2, 0.2], [0.8, 0.2], [0.8, 0.8], [0.2, 0.8]],
])
transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=symmetric_pad)
input_t = torch.ones((3, 32, 64), dtype=torch.float32)
out, new_target = transfo(input_t, target_boxes)

assert out.shape[-2:] == (64, 64)
assert new_target.shape == target_boxes.shape
assert np.all((0 <= new_target) & (new_target <= 1))

# Test with invalid target shape
input_t = torch.ones((3, 32, 64), dtype=torch.float32)
out, new_target = transfo(input_t, target_boxes)
target = np.ones((2, 5)) # Invalid shape

assert out.shape[-2:] == output_size
assert new_target.shape == target_boxes.shape
assert np.all(new_target >= 0) and np.all(new_target <= 1)

out = transfo(input_t)
assert out.shape[-2:] == output_size
transfo = Resize((64, 64), preserve_aspect_ratio=True)
with pytest.raises(AssertionError):
transfo(input_t, target)


@pytest.mark.parametrize(
Expand Down
Loading