Spaces:

MikeTrizna
/

doctr_demo_fork

Build error

App Files Files Community

MikeTrizna commited on Aug 26, 2025

Commit

f3270e6

verified ·

1 Parent(s): f2d0191

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +9 -10
README.md +41 -13
app.py +126 -0
backend/__pycache__/pytorch.cpython-312.pyc +0 -0
backend/pytorch.py +97 -0
backend/tensorflow.py +99 -0
packages.txt +2 -0
requirements.txt +2 -3
src/python-doctr/.github/ISSUE_TEMPLATE/bug_report.yml +63 -0
src/python-doctr/.github/ISSUE_TEMPLATE/config.yml +5 -0
src/python-doctr/.github/ISSUE_TEMPLATE/feature_request.yml +33 -0
src/python-doctr/.github/dependabot.yml +30 -0
src/python-doctr/.github/release.yml +24 -0
src/python-doctr/.github/verify_pr_labels.py +87 -0
src/python-doctr/.github/workflows/builds.yml +43 -0
src/python-doctr/.github/workflows/clear_caches.yml +15 -0
src/python-doctr/.github/workflows/demo.yml +89 -0
src/python-doctr/.github/workflows/doc-status.yml +22 -0
src/python-doctr/.github/workflows/docker.yml +36 -0
src/python-doctr/.github/workflows/docs.yml +51 -0
src/python-doctr/.github/workflows/main.yml +90 -0
src/python-doctr/.github/workflows/pr-labels.yml +29 -0
src/python-doctr/.github/workflows/public_docker_images.yml +91 -0
src/python-doctr/.github/workflows/publish.yml +65 -0
src/python-doctr/.github/workflows/pull_requests.yml +32 -0
src/python-doctr/.github/workflows/references.yml +253 -0
src/python-doctr/.github/workflows/scripts.yml +121 -0
src/python-doctr/.github/workflows/style.yml +55 -0
src/python-doctr/.gitignore +140 -0
src/python-doctr/.pre-commit-config.yaml +23 -0
src/python-doctr/CODE_OF_CONDUCT.md +128 -0
src/python-doctr/CONTRIBUTING.md +92 -0
src/python-doctr/Dockerfile +46 -0
src/python-doctr/LICENSE +201 -0
src/python-doctr/Makefile +29 -0
src/python-doctr/README.md +349 -0
src/python-doctr/api/.gitignore +2 -0
src/python-doctr/api/Dockerfile +25 -0
src/python-doctr/api/Makefile +26 -0
src/python-doctr/api/README.md +194 -0
src/python-doctr/api/app/config.py +13 -0
src/python-doctr/api/app/main.py +48 -0
src/python-doctr/api/app/routes/detection.py +35 -0
src/python-doctr/api/app/routes/kie.py +53 -0
src/python-doctr/api/app/routes/ocr.py +66 -0
src/python-doctr/api/app/routes/recognition.py +29 -0
src/python-doctr/api/app/schemas.py +186 -0
src/python-doctr/api/app/utils.py +47 -0
src/python-doctr/api/app/vision.py +53 -0
src/python-doctr/api/docker-compose.yml +9 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,34 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/images/demo_illustration_mini.png filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/images/demo_update.png filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/images/doctr-need-help.png filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/images/doctr_demo_app.png filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/images/ocr.png filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/docs/source/_static/images/favicon.ico filter=lfs diff=lfs merge=lfs -text
+src/python-doctr/doctr/datasets/__pycache__/vocabs.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,19 +1,47 @@
 ---
-title: Doctr Demo Fork
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
 pinned: false
-short_description: A fork of https://huggingface.co/spaces/mindee/doctr
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 ---
+title: docTR
+emoji: 📑
+colorFrom: purple
+colorTo: pink
+sdk: streamlit
+sdk_version: 1.39.0
+app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+## Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
+See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.
+## Run the demo locally
+```bash
+cd demo
+pip install -r pt-requirements.txt
+streamlit run app.py
+```

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import streamlit as st
+import torch
+from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
+from doctr.io import DocumentFile
+from doctr.utils.visualization import visualize_page
+forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def main(det_archs, reco_archs):
+    """Build a streamlit layout"""
+    # Wide mode
+    st.set_page_config(layout="wide")
+    # Designing the interface
+    st.title("docTR: Document Text Recognition")
+    # For newline
+    st.write("\n")
+    # Instructions
+    st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
+    # Set the columns
+    cols = st.columns((1, 1, 1, 1))
+    cols[0].subheader("Input page")
+    cols[1].subheader("Segmentation heatmap")
+    cols[2].subheader("OCR output")
+    cols[3].subheader("Page reconstitution")
+    # Sidebar
+    # File selection
+    st.sidebar.title("Document selection")
+    # Choose your own image
+    uploaded_file = st.sidebar.file_uploader("Upload files", type=["pdf", "png", "jpeg", "jpg"])
+    if uploaded_file is not None:
+        if uploaded_file.name.endswith(".pdf"):
+            doc = DocumentFile.from_pdf(uploaded_file.read())
+        else:
+            doc = DocumentFile.from_images(uploaded_file.read())
+        page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
+        page = doc[page_idx]
+        cols[0].image(page)
+    # Model selection
+    st.sidebar.title("Model selection")
+    st.sidebar.markdown("**Backend**: PyTorch")
+    det_arch = st.sidebar.selectbox("Text detection model", det_archs)
+    reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs)
+    # For newline
+    st.sidebar.write("\n")
+    # Only straight pages or possible rotation
+    st.sidebar.title("Parameters")
+    assume_straight_pages = st.sidebar.checkbox("Assume straight pages", value=True)
+    # Disable page orientation detection
+    disable_page_orientation = st.sidebar.checkbox("Disable page orientation detection", value=False)
+    # Disable crop orientation detection
+    disable_crop_orientation = st.sidebar.checkbox("Disable crop orientation detection", value=False)
+    # Straighten pages
+    straighten_pages = st.sidebar.checkbox("Straighten pages", value=False)
+    # Export as straight boxes
+    export_straight_boxes = st.sidebar.checkbox("Export as straight boxes", value=False)
+    st.sidebar.write("\n")
+    # Binarization threshold
+    bin_thresh = st.sidebar.slider("Binarization threshold", min_value=0.1, max_value=0.9, value=0.3, step=0.1)
+    st.sidebar.write("\n")
+    # Box threshold
+    box_thresh = st.sidebar.slider("Box threshold", min_value=0.1, max_value=0.9, value=0.1, step=0.1)
+    st.sidebar.write("\n")
+    if st.sidebar.button("Analyze page"):
+        if uploaded_file is None:
+            st.sidebar.write("Please upload a document")
+        else:
+            with st.spinner("Loading model..."):
+                predictor = load_predictor(
+                    det_arch=det_arch,
+                    reco_arch=reco_arch,
+                    assume_straight_pages=assume_straight_pages,
+                    straighten_pages=straighten_pages,
+                    export_as_straight_boxes=export_straight_boxes,
+                    disable_page_orientation=disable_page_orientation,
+                    disable_crop_orientation=disable_crop_orientation,
+                    bin_thresh=bin_thresh,
+                    box_thresh=box_thresh,
+                    device=forward_device,
+                )
+            with st.spinner("Analyzing..."):
+                # Forward the image to the model
+                seg_map = forward_image(predictor, page, forward_device)
+                seg_map = np.squeeze(seg_map)
+                seg_map = cv2.resize(seg_map, (page.shape[1], page.shape[0]), interpolation=cv2.INTER_LINEAR)
+                # Plot the raw heatmap
+                fig, ax = plt.subplots()
+                ax.imshow(seg_map)
+                ax.axis("off")
+                cols[1].pyplot(fig)
+                # Plot OCR output
+                out = predictor([page])
+                fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False)
+                cols[2].pyplot(fig)
+                # Page reconsitution under input page
+                page_export = out.pages[0].export()
+                if assume_straight_pages or (not assume_straight_pages and straighten_pages):
+                    img = out.pages[0].synthesize()
+                    cols[3].image(img, clamp=True)
+                # Display JSON
+                st.markdown("\nHere are your analysis results in JSON format:")
+                st.json(page_export, expanded=False)
+if __name__ == "__main__":
+    main(DET_ARCHS, RECO_ARCHS)

backend/__pycache__/pytorch.cpython-312.pyc ADDED Viewed

Binary file (3.46 kB). View file

backend/pytorch.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import numpy as np
+import torch
+from doctr.models import ocr_predictor
+from doctr.models.predictor import OCRPredictor
+DET_ARCHS = [
+    "fast_base",
+    "fast_small",
+    "fast_tiny",
+    "db_resnet50",
+    "db_resnet34",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+]
+RECO_ARCHS = [
+    "crnn_vgg16_bn",
+    "crnn_mobilenet_v3_small",
+    "crnn_mobilenet_v3_large",
+    "master",
+    "sar_resnet31",
+    "vitstr_small",
+    "vitstr_base",
+    "parseq",
+    "viptr_tiny",
+]
+def load_predictor(
+    det_arch: str,
+    reco_arch: str,
+    assume_straight_pages: bool,
+    straighten_pages: bool,
+    export_as_straight_boxes: bool,
+    disable_page_orientation: bool,
+    disable_crop_orientation: bool,
+    bin_thresh: float,
+    box_thresh: float,
+    device: torch.device,
+) -> OCRPredictor:
+    """Load a predictor from doctr.models
+    Args:
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        assume_straight_pages: whether to assume straight pages or not
+        straighten_pages: whether to straighten rotated pages or not
+        export_as_straight_boxes: whether to export boxes as straight or not
+        disable_page_orientation: whether to disable page orientation or not
+        disable_crop_orientation: whether to disable crop orientation or not
+        bin_thresh: binarization threshold for the segmentation map
+        box_thresh: minimal objectness score to consider a box
+        device: torch.device, the device to load the predictor on
+    Returns:
+        instance of OCRPredictor
+    """
+    predictor = ocr_predictor(
+        det_arch,
+        reco_arch,
+        pretrained=True,
+        assume_straight_pages=assume_straight_pages,
+        straighten_pages=straighten_pages,
+        export_as_straight_boxes=export_as_straight_boxes,
+        detect_orientation=not assume_straight_pages,
+        disable_page_orientation=disable_page_orientation,
+        disable_crop_orientation=disable_crop_orientation,
+    ).to(device)
+    predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+    predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
+    return predictor
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: torch.device) -> np.ndarray:
+    """Forward an image through the predictor
+    Args:
+        predictor: instance of OCRPredictor
+        image: image to process
+        device: torch.device, the device to process the image on
+    Returns:
+        segmentation map
+    """
+    with torch.no_grad():
+        processed_batches = predictor.det_predictor.pre_processor([image])
+        out = predictor.det_predictor.model(processed_batches[0].to(device), return_model_output=True)
+        seg_map = out["out_map"].to("cpu").numpy()
+    return seg_map

backend/tensorflow.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import numpy as np
+import tensorflow as tf
+from doctr.models import ocr_predictor
+from doctr.models.predictor import OCRPredictor
+DET_ARCHS = [
+    "fast_base",
+    "fast_small",
+    "fast_tiny",
+    "db_resnet50",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+]
+RECO_ARCHS = [
+    "crnn_vgg16_bn",
+    "crnn_mobilenet_v3_small",
+    "crnn_mobilenet_v3_large",
+    "master",
+    "sar_resnet31",
+    "vitstr_small",
+    "vitstr_base",
+    "parseq",
+]
+def load_predictor(
+    det_arch: str,
+    reco_arch: str,
+    assume_straight_pages: bool,
+    straighten_pages: bool,
+    export_as_straight_boxes: bool,
+    disable_page_orientation: bool,
+    disable_crop_orientation: bool,
+    bin_thresh: float,
+    box_thresh: float,
+    device: tf.device,
+) -> OCRPredictor:
+    """Load a predictor from doctr.models
+    Args:
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        assume_straight_pages: whether to assume straight pages or not
+        straighten_pages: whether to straighten rotated pages or not
+        export_as_straight_boxes: whether to export boxes as straight or not
+        disable_page_orientation: whether to disable page orientation or not
+        disable_crop_orientation: whether to disable crop orientation or not
+        bin_thresh: binarization threshold for the segmentation map
+        box_thresh: threshold for the detection boxes
+        device: tf.device, the device to load the predictor on
+    Returns:
+        instance of OCRPredictor
+    """
+    with device:
+        predictor = ocr_predictor(
+            det_arch,
+            reco_arch,
+            pretrained=True,
+            assume_straight_pages=assume_straight_pages,
+            straighten_pages=straighten_pages,
+            export_as_straight_boxes=export_as_straight_boxes,
+            detect_orientation=not assume_straight_pages,
+            disable_page_orientation=disable_page_orientation,
+            disable_crop_orientation=disable_crop_orientation,
+        )
+        predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+        predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
+    return predictor
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: tf.device) -> np.ndarray:
+    """Forward an image through the predictor
+    Args:
+        predictor: instance of OCRPredictor
+        image: image to process as numpy array
+        device: tf.device, the device to process the image on
+    Returns:
+        segmentation map
+    """
+    with device:
+        processed_batches = predictor.det_predictor.pre_processor([image])
+        out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
+        seg_map = out["out_map"]
+    with tf.device("/cpu:0"):
+        seg_map = tf.identity(seg_map).numpy()
+    return seg_map

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ python3-opencv
2	+ fonts-freefont-ttf

requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
-altair
-pandas
-streamlit


1	+ -e git+https://github.com/mindee/doctr.git#egg=python-doctr[viz]
2	+ streamlit>=1.0.0

src/python-doctr/.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

	@@ -0,0 +1,63 @@

+name: 🐛 Bug report
+description: Create a report to help us improve the library
+labels: 'type: bug'
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/mindee/doctr/issues?q=is%3Aissue).
+- type: textarea
+  attributes:
+    label: Bug description
+    description: |
+      A clear and concise description of what the bug is.
+      Please explain the result you observed and the behavior you were expecting.
+    placeholder: |
+      A clear and concise description of what the bug is.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Code snippet to reproduce the bug
+    description: |
+      Sample code to reproduce the problem.
+      Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability.
+    placeholder: |
+      ```python
+      Sample code to reproduce the problem
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Error traceback
+    description: |
+      The error message you received running the code snippet, with the full traceback.
+      Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability.
+    placeholder: |
+      ```
+      The error message you got, with the full traceback.
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Environment
+    description: |
+      Please run the following command and paste the output below.
+      ```sh
+      wget https://raw.githubusercontent.com/mindee/doctr/main/scripts/collect_env.py
+      # For security purposes, please check the contents of collect_env.py before running it.
+      python collect_env.py
+      ```
+  validations:
+    required: true
+- type: markdown
+  attributes:
+    value: >
+      Thanks for helping us improve the library!

src/python-doctr/.github/ISSUE_TEMPLATE/config.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+blank_issues_enabled: true
+contact_links:
+  - name: Usage questions
+    url: https://github.com/mindee/doctr/discussions
+    about: Ask questions and discuss with other docTR community members

src/python-doctr/.github/ISSUE_TEMPLATE/feature_request.yml ADDED Viewed

	@@ -0,0 +1,33 @@

+name: 🚀 Feature request
+description: Submit a proposal/request for a new feature for docTR
+labels: 'type: enhancement'
+body:
+- type: textarea
+  attributes:
+    label: 🚀 The feature
+    description: >
+      A clear and concise description of the feature proposal
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Motivation, pitch
+    description: >
+      Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Alternatives
+    description: >
+      A description of any alternative solutions or features you've considered, if any.
+- type: textarea
+  attributes:
+    label: Additional context
+    description: >
+      Add any other context or screenshots about the feature request.
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉

src/python-doctr/.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    open-pull-requests-limit: 10
+    target-branch: "main"
+    labels: ["topic: build"]
+    schedule:
+      interval: weekly
+      day: sunday
+    reviewers:
+      - "charlesmindee"
+      - "felixdittrich92"
+      - "odulcy-mindee"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    open-pull-requests-limit: 10
+    target-branch: "main"
+    labels: ["topic: ci"]
+    schedule:
+      interval: weekly
+      day: sunday
+    reviewers:
+      - "charlesmindee"
+      - "felixdittrich92"
+      - "odulcy-mindee"
+    groups:
+      github-actions:
+        patterns:
+          - "*"

src/python-doctr/.github/release.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: Breaking Changes 🛠
+      labels:
+        - "type: breaking change"
+    # NEW FEATURES
+    - title: New Features
+      labels:
+        - "type: new feature"
+    # BUG FIXES
+    - title: Bug Fixes
+      labels:
+        - "type: bug"
+    # IMPROVEMENTS
+    - title: Improvements
+      labels:
+        - "type: enhancement"
+    # MISC
+    - title: Miscellaneous
+      labels:
+        - "type: misc"

src/python-doctr/.github/verify_pr_labels.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+"""Borrowed & adapted from https://github.com/pytorch/vision/blob/main/.github/process_commit.py
+This script finds the merger responsible for labeling a PR by a commit SHA. It is used by the workflow in
+'.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled,
+this script is a no-op.
+Note: we ping the merger only, not the reviewers, as the reviewers can sometimes be external to torchvision
+with no labeling responsibility, so we don't want to bother them.
+"""
+from typing import Any
+import requests
+# For a PR to be properly labeled it should have one primary label and one secondary label
+# Should specify the type of change
+PRIMARY_LABELS = {
+    "type: new feature",
+    "type: bug",
+    "type: enhancement",
+    "type: misc",
+}
+# Should specify what has been modified
+SECONDARY_LABELS = {
+    "topic: documentation",
+    "module: datasets",
+    "module: io",
+    "module: models",
+    "module: transforms",
+    "module: utils",
+    "ext: api",
+    "ext: demo",
+    "ext: docs",
+    "ext: notebooks",
+    "ext: references",
+    "ext: scripts",
+    "ext: tests",
+    "topic: build",
+    "topic: ci",
+    "topic: docker",
+}
+GH_ORG = "mindee"
+GH_REPO = "doctr"
+def query_repo(cmd: str, *, accept) -> Any:
+    response = requests.get(f"https://api.github.com/repos/{GH_ORG}/{GH_REPO}/{cmd}", headers=dict(Accept=accept))
+    return response.json()
+def get_pr_merger_and_labels(pr_number: int) -> tuple[str, set[str]]:
+    # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
+    data = query_repo(f"pulls/{pr_number}", accept="application/vnd.github.v3+json")
+    merger = data.get("merged_by", {}).get("login")
+    labels = {label["name"] for label in data["labels"]}
+    return merger, labels
+def main(args):
+    merger, labels = get_pr_merger_and_labels(args.pr)
+    is_properly_labeled = bool(PRIMARY_LABELS.intersection(labels) and SECONDARY_LABELS.intersection(labels))
+    if isinstance(merger, str) and not is_properly_labeled:
+        print(f"@{merger}")
+def parse_args():
+    import argparse
+    parser = argparse.ArgumentParser(
+        description="PR label checker", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("pr", type=int, help="PR number")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)

src/python-doctr/.github/workflows/builds.yml ADDED Viewed

	@@ -0,0 +1,43 @@

+name: builds
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python: ["3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v5
+      - if: matrix.os == 'macos-latest'
+        name: Install MacOS prerequisites
+        run: brew install cairo pango gdk-pixbuf libffi
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865
+          python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          if [ "${{ runner.os }}" = "Windows" ]; then
+            pip install -e .[viz] --upgrade
+          else
+            pip install -e .[viz,html] --upgrade
+          fi
+        shell: bash  # Ensures shell is consistent across OSes
+      - name: Import package
+        run: python -c "import doctr; print(doctr.__version__)"

src/python-doctr/.github/workflows/clear_caches.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+name: Clear GitHub runner caches
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'  # Runs once a day
+jobs:
+  clear:
+    name: Clear caches
+    runs-on: ubuntu-latest
+    steps:
+    - uses: MyAlbum/purge-cache@v2
+      with:
+        max-age: 172800 # Caches older than 2 days are deleted

src/python-doctr/.github/workflows/demo.yml ADDED Viewed

	@@ -0,0 +1,89 @@

+name: demo
+on:
+  # Run 'test-demo' on every pull request to the main branch
+  pull_request:
+    branches: [main]
+  # Run 'test-demo' on every push to the main branch or both jobs when a new version tag is pushed
+  push:
+    branches:
+      - main
+    tags:
+      - 'v*'
+  # Run 'sync-to-hub' on a scheduled cron job
+  schedule:
+    - cron: '0 2 10 * *'  # At 02:00 on day-of-month 10 (every month)
+  # Allow manual triggering of the workflow
+  workflow_dispatch:
+jobs:
+  test-demo:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - if: matrix.os == 'macos-latest'
+        name: Install MacOS prerequisites
+        run: brew install cairo pango gdk-pixbuf libffi
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('demo/pt-requirements.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r demo/pt-requirements.txt
+      - name: Run demo
+        run: |
+          streamlit --version
+          screen -dm streamlit run demo/app.py
+          sleep 10
+          curl http://localhost:8501/docs
+  # This job only runs when a new version tag is pushed or during the cron job or when manually triggered
+  sync-to-hub:
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+    needs: test-demo
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Install huggingface_hub
+        run: pip install huggingface-hub
+      - name: Upload folder to Hugging Face
+        # Only keep the requirements.txt file for the demo (PyTorch)
+        run: |
+          mv demo/pt-requirements.txt demo/requirements.txt
+          python -c "
+          from huggingface_hub import HfApi
+          api = HfApi(token='${{ secrets.HF_TOKEN }}')
+          repo_id = 'mindee/doctr'
+          api.upload_folder(repo_id=repo_id, repo_type='space', folder_path='demo/')
+          api.restart_space(repo_id=repo_id, factory_reboot=True)
+          "

src/python-doctr/.github/workflows/doc-status.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: doc-status
+on:
+  page_build
+jobs:
+  see-page-build-payload:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          architecture: x64
+      - name: check status
+        run: |
+          import os
+          status, errormsg = os.getenv('STATUS'), os.getenv('ERROR')
+          if status != 'built': raise AssertionError(f"There was an error building the page on GitHub pages.\n\nStatus: {status}\n\nError messsage: {errormsg}")
+        shell: python
+        env:
+          STATUS: ${{ github.event.build.status }}
+          ERROR: ${{ github.event.build.error.message }}

src/python-doctr/.github/workflows/docker.yml ADDED Viewed

	@@ -0,0 +1,36 @@

+name: docker
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  docker-package:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - name: Build docker image
+        run: docker build -t doctr-py3.10-slim --build-arg SYSTEM=cpu .
+      - name: Run docker container
+        run: docker run doctr-py3.10-slim python3 -c 'import doctr'
+  pytest-api:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Build & run docker
+        run: cd api && make lock && make run
+      - name: Ping server
+        run: wget --spider --tries=12 http://localhost:8080/docs
+      - name: Run docker test
+        run: cd api && make test

src/python-doctr/.github/workflows/docs.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+name: docs
+on:
+  push:
+    branches: main
+jobs:
+  docs-deploy:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          persist-credentials: false
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -e .[docs]
+      - name: Build documentation
+        run: cd docs && bash build.sh
+      - name: Documentation sanity check
+        run: test -e docs/build/index.html || exit
+      - name: Install SSH Client 🔑
+        uses: webfactory/ssh-agent@v0.9.1
+        with:
+          ssh-private-key: ${{ secrets.SSH_DEPLOY_KEY }}
+      - name: Deploy to Github Pages
+        uses: JamesIves/github-pages-deploy-action@v4.7.3
+        with:
+          BRANCH: gh-pages
+          FOLDER: 'docs/build'
+          COMMIT_MESSAGE: '[skip ci] Documentation updates'
+          CLEAN: true
+          SSH: true

src/python-doctr/.github/workflows/main.yml ADDED Viewed

	@@ -0,0 +1,90 @@

+name: tests
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  pytest-common:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -e .[testing]
+      - name: Run unittests
+        run: |
+          coverage run -m pytest tests/common/ -rs
+          coverage xml -o coverage-common.xml
+      - uses: actions/upload-artifact@v4
+        with:
+          name: coverage-common
+          path: ./coverage-common.xml
+          if-no-files-found: error
+  pytest-torch:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -e .[testing]
+      - name: Run unittests
+        run: |
+          coverage run -m pytest tests/pytorch/ -rs
+          coverage xml -o coverage-pt.xml
+      - uses: actions/upload-artifact@v4
+        with:
+          name: coverage-pytorch
+          path: ./coverage-pt.xml
+          if-no-files-found: error
+  codecov-upload:
+    runs-on: ubuntu-latest
+    needs: [ pytest-common, pytest-torch ]
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/download-artifact@v5
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          flags: unittests
+          fail_ci_if_error: true
+          token: ${{ secrets.CODECOV_TOKEN }}

src/python-doctr/.github/workflows/pr-labels.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: pr-labels
+on:
+  pull_request:
+    branches: main
+    types: closed
+jobs:
+  is-properly-labeled:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+      - name: Set up python
+        uses: actions/setup-python@v5
+      - name: Install requests
+        run: pip install requests
+      - name: Process commit and find merger responsible for labeling
+        id: commit
+        run: echo "::set-output name=merger::$(python .github/verify_pr_labels.py ${{ github.event.pull_request.number }})"
+      - name: 'Comment PR'
+        uses: actions/github-script@v7.0.1
+        if: ${{ steps.commit.outputs.merger != '' }}
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const { issue: { number: issue_number }, repo: { owner, repo }  } = context;
+            github.rest.issues.createComment({ issue_number, owner, repo, body: 'Hey ${{ steps.commit.outputs.merger }} 👋\nYou merged this PR, but it is not correctly labeled. The list of valid labels is available at https://github.com/mindee/doctr/blob/main/.github/verify_pr_labels.py' });

src/python-doctr/.github/workflows/public_docker_images.yml ADDED Viewed

	@@ -0,0 +1,91 @@

+# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
+#
+name: Docker image on ghcr.io
+on:
+  push:
+    tags:
+      - 'v*'
+  pull_request:
+    branches: main
+  schedule:
+    - cron: '0 2 1 */3 *'  # At 02:00 on the 1st day of every 3rd month
+env:
+  REGISTRY: ghcr.io
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # Must match version at https://www.python.org/ftp/python/
+        python: ["3.10.13", "3.11.8", "3.12.7"]
+        # NOTE: Since docTR 1.0.0 torch doesn't exist as a seperate install option it's only to keep the naming convention
+        framework: ["torch", "torch,viz,html,contrib"]
+    # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Sanitize docker tag
+        run: |
+          PREFIX_DOCKER_TAG="${{ matrix.framework }}-py${{ matrix.python }}-"
+          PREFIX_DOCKER_TAG=$(echo ${PREFIX_DOCKER_TAG}|sed 's/,/-/g')
+          echo PREFIX_DOCKER_TAG=${PREFIX_DOCKER_TAG} >> $GITHUB_ENV
+          echo $PREFIX_DOCKER_TAG
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.repository }}
+          tags: |
+            # used only on schedule event
+            type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ env.PREFIX_DOCKER_TAG }}
+            # used only if a tag following semver is published
+            type=semver,pattern={{raw}},prefix=${{ env.PREFIX_DOCKER_TAG }}
+      - name: Build Docker image
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          build-args: |
+            FRAMEWORK=${{ matrix.framework }}
+            PYTHON_VERSION=${{ matrix.python }}
+            DOCTR_REPO=${{ github.repository }}
+            DOCTR_VERSION=${{ github.sha }}
+          push: false  # push only if `import doctr` works
+          tags: ${{ steps.meta.outputs.tags }}
+      - name: Check if `import doctr` works
+        run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import doctr'
+      - name: Push Docker image
+        # Push only if the CI is not triggered by "PR on main"
+        if: ${{ (github.ref == 'refs/heads/main' && github.event_name != 'pull_request') || (startsWith(github.ref, 'refs/tags') && github.event_name == 'push') }}
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          build-args: |
+            FRAMEWORK=${{ matrix.framework }}
+            PYTHON_VERSION=${{ matrix.python }}
+            DOCTR_REPO=${{ github.repository }}
+            DOCTR_VERSION=${{ github.sha }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}

src/python-doctr/.github/workflows/publish.yml ADDED Viewed

	@@ -0,0 +1,65 @@

+name: publish
+on:
+  release:
+    types: [published]
+jobs:
+  pypi:
+    if: "!github.event.release.prerelease"
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine --upgrade
+      - name: Get release tag
+        id: release_tag
+        run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+          VERSION: ${{ env.VERSION }}
+        run: |
+          BUILD_VERSION=$VERSION python setup.py sdist bdist_wheel
+          twine check dist/*
+          twine upload dist/*
+  pypi-check:
+    needs: pypi
+    if: "!github.event.release.prerelease"
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install python-doctr
+          python -c "from importlib.metadata import version; print(version('python-doctr'))"

src/python-doctr/.github/workflows/pull_requests.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: pull_requests
+on:
+  pull_request:
+    branches: main
+jobs:
+  docs-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -e .[docs]
+      - name: Build documentation
+        run: cd docs && bash build.sh
+      - name: Documentation sanity check
+        run: test -e docs/build/index.html || exit

src/python-doctr/.github/workflows/references.yml ADDED Viewed

	@@ -0,0 +1,253 @@

+name: references
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  train-char-classification:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+          sudo apt-get update && sudo apt-get install fonts-freefont-ttf -y
+      - name: Train for a short epoch
+        run: python references/classification/train_character.py vit_s -b 32 --val-samples 1 --train-samples 1 --epochs 1
+  train-orientation-classification:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Download and extract detection toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_detection_set-bbbb4243.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_detection_set-bbbb4243.zip -d det_set
+      - name: Download and extract recognition toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_recogition_set-036a4d80.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_recogition_set-036a4d80.zip -d reco_set
+      - name: Train for a short epoch (document orientation)
+        run: python references/classification/train_orientation.py resnet18 --type page --train_path ./det_set --val_path ./det_set  -b 2 --epochs 1
+      - name: Train for a short epoch (crop orientation)
+        run: python references/classification/train_orientation.py resnet18 --type crop --train_path ./reco_set --val_path ./reco_set  -b 4 --epochs 1
+  train-text-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Download and extract toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_recogition_set-036a4d80.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_recogition_set-036a4d80.zip -d reco_set
+      - name: Train for a short epoch
+        run: python references/recognition/train.py crnn_mobilenet_v3_small --train_path ./reco_set --val_path ./reco_set -b 4 --epochs 1
+  evaluate-text-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Evaluate text recognition
+        run: python references/recognition/evaluate.py crnn_mobilenet_v3_small --dataset SVT -b 32
+  latency-text-recognition:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Benchmark latency
+        run: python references/recognition/latency.py crnn_mobilenet_v3_small --it 5
+  train-text-detection:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Download and extract toy set
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_detection_set-bbbb4243.zip
+          sudo apt-get update && sudo apt-get install unzip -y
+          unzip toy_detection_set-bbbb4243.zip -d det_set
+      - name: Train for a short epoch
+        run: python references/detection/train.py db_mobilenet_v3_large --train_path ./det_set --val_path ./det_set -b 2 --epochs 1
+  evaluate-text-detection:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+          pip install -r references/requirements.txt
+      - name: Evaluate text detection
+        run: python references/detection/evaluate.py db_mobilenet_v3_large
+  latency-text-detection:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Benchmark latency
+        run: python references/detection/latency.py db_mobilenet_v3_large --it 5 --size 512

src/python-doctr/.github/workflows/scripts.yml ADDED Viewed

	@@ -0,0 +1,121 @@

+name: scripts
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  test-analyze:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10", "3.11"]
+    steps:
+      - if: matrix.os == 'macos-latest'
+        name: Install MacOS prerequisites
+        run: brew install cairo pango gdk-pixbuf libffi
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Run analysis script
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.1.0/sample.pdf
+          python scripts/analyze.py sample.pdf --noblock --detection db_mobilenet_v3_large
+  test-detect-text:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10", "3.11"]
+    steps:
+      - if: matrix.os == 'macos-latest'
+        name: Install MacOS prerequisites
+        run: brew install cairo pango gdk-pixbuf libffi
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Run detection script
+        run: |
+          wget https://github.com/mindee/doctr/releases/download/v0.1.0/sample.pdf
+          python scripts/detect_text.py sample.pdf --detection db_mobilenet_v3_large
+  test-evaluate:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10", "3.11"]
+    steps:
+      - if: matrix.os == 'macos-latest'
+        name: Install MacOS prerequisites
+        run: brew install cairo pango gdk-pixbuf libffi
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
+      - name: Install package
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[viz,html] --upgrade
+      - name: Run evaluation script
+        run: |
+          python scripts/evaluate.py db_resnet50 crnn_vgg16_bn --samples 10
+          python scripts/evaluate_kie.py db_resnet50 crnn_vgg16_bn --samples 10
+  test-collectenv:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python: ["3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865
+          python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }}
+          architecture: x64
+      - name: Run environment collection script
+        run: python scripts/collect_env.py

src/python-doctr/.github/workflows/style.yml ADDED Viewed

	@@ -0,0 +1,55 @@

+name: style
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+jobs:
+  ruff:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Run ruff
+        run: |
+          pip install ruff --upgrade
+          ruff --version
+          ruff check --diff .
+  mypy:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.10"]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          architecture: x64
+      - name: Cache python modules
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-style
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[dev] --upgrade
+          pip install mypy --upgrade
+      - name: Run mypy
+        run: |
+          mypy --version
+          mypy

src/python-doctr/.gitignore ADDED Viewed

	@@ -0,0 +1,140 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Temp files
+doctr/version.py
+logs/
+wandb/
+.idea/
+# Checkpoints
+*.pt
+*.pb
+*.index

src/python-doctr/.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: check-ast
+      - id: check-yaml
+        exclude: .conda
+      - id: check-toml
+      - id: check-json
+      - id: check-added-large-files
+        exclude: docs/images/
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: debug-statements
+      - id: check-merge-conflict
+      - id: no-commit-to-branch
+        args: ['--branch', 'main']
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.8
+    hooks:
+      - id: ruff
+        args: [ --fix ]
+      - id: ruff-format

src/python-doctr/CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,128 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+contact@mindee.com.
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series
+of actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.

src/python-doctr/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,92 @@

+# Contributing to docTR
+Everything you need to know to contribute efficiently to the project.
+## Codebase structure
+- [doctr](https://github.com/mindee/doctr/blob/main/doctr) - The package codebase
+- [tests](https://github.com/mindee/doctr/blob/main/tests) - Python unit tests
+- [docs](https://github.com/mindee/doctr/blob/main/docs) - Library documentation building
+- [scripts](https://github.com/mindee/doctr/blob/main/scripts) - Example scripts
+- [references](https://github.com/mindee/doctr/blob/main/references) - Reference training scripts
+- [demo](https://github.com/mindee/doctr/blob/main/demo) - Small demo app to showcase docTR capabilities
+- [api](https://github.com/mindee/doctr/blob/main/api) - A minimal template to deploy a REST API with docTR
+## Continuous Integration
+This project uses the following integrations to ensure proper codebase maintenance:
+- [Github Workflow](https://help.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow) - run jobs for package build and coverage
+- [Codecov](https://codecov.io/) - reports back coverage results
+As a contributor, you will only have to ensure coverage of your code by adding appropriate unit testing of your code.
+## Feedback
+### Feature requests & bug report
+Whether you encountered a problem, or you have a feature suggestion, your input has value and can be used by contributors to reference it in their developments. For this purpose, we advise you to use Github [issues](https://github.com/mindee/doctr/issues).
+First, check whether the topic wasn't already covered in an open / closed issue. If not, feel free to open a new one! When doing so, use issue templates whenever possible and provide enough information for other contributors to jump in.
+### Questions
+If you are wondering how to do something with docTR, or a more general question, you should consider checking out Github [discussions](https://github.com/mindee/doctr/discussions). See it as a Q&A forum, or the docTR-specific StackOverflow!
+## Developing docTR
+### Developer mode installation
+Install all additional dependencies with the following command:
+```shell
+python -m pip install --upgrade pip
+pip install -e '.[dev]'
+pre-commit install
+```
+### Commits
+- **Code**: ensure to provide docstrings to your Python code. In doing so, please follow [Google-style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) so it can ease the process of documentation later.
+- **Commit message**: please follow [Udacity guide](http://udacity.github.io/git-styleguide/)
+### Unit tests
+In order to run the same unit tests as the CI workflows, you can run unittests locally:
+```shell
+make test
+```
+### Code quality
+To run all quality checks together
+```shell
+make quality
+```
+#### Code style verification
+To run all style checks together
+```shell
+make style
+```
+### Modifying the documentation
+The current documentation is built using `sphinx` thanks to our CI.
+You can build the documentation locally:
+```shell
+make docs-single-version
+```
+Please note that files that have not been modified will not be rebuilt. If you want to force a complete rebuild, you can delete the `_build` directory. Additionally, you may need to clear your web browser's cache to see the modifications.
+You can now open your local version of the documentation located at `docs/_build/index.html` in your browser
+## Let's connect
+Should you wish to connect somewhere else than on GitHub, feel free to join us on [Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-uzgmljfl-MotFVfH~IdEZxjp~0zldww), where you will find a `#doctr` channel!

src/python-doctr/Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+FROM nvidia/cuda:12.2.0-base-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LANG=C.UTF-8
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    # - Other packages
+    build-essential \
+    pkg-config \
+    curl \
+    wget \
+    software-properties-common \
+    unzip \
+    git \
+    # - Packages to build Python
+    tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \
+    # - Packages for docTR
+    libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python
+ARG PYTHON_VERSION=3.10.13
+RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
+    tar -zxf Python-$PYTHON_VERSION.tgz && \
+    cd Python-$PYTHON_VERSION && \
+    mkdir /opt/python/ && \
+    ./configure --prefix=/opt/python && \
+    make && \
+    make install && \
+    cd .. && \
+    rm Python-$PYTHON_VERSION.tgz && \
+    rm -r Python-$PYTHON_VERSION
+ENV PATH=/opt/python/bin:$PATH
+# Install docTR
+ARG FRAMEWORK=torch
+ARG DOCTR_REPO='mindee/doctr'
+ARG DOCTR_VERSION=main
+RUN pip3 install -U pip setuptools wheel && \
+    pip3 install "python-doctr[$FRAMEWORK]@git+https://github.com/$DOCTR_REPO.git@$DOCTR_VERSION"

src/python-doctr/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2022 Mindee
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

src/python-doctr/Makefile ADDED Viewed

	@@ -0,0 +1,29 @@

+.PHONY: quality style test test-common test-tf test-torch docs-single-version docs
+# this target runs checks on all files
+quality:
+	ruff check .
+	mypy doctr/
+# this target runs checks on all files and potentially modifies some of them
+style:
+	ruff format .
+	ruff check --fix .
+# Run tests for the library
+test:
+	coverage run -m pytest tests/common/ -rs
+	coverage run -m pytest tests/pytorch/ -rs
+test-common:
+	coverage run -m pytest tests/common/ -rs
+test-torch:
+	coverage run -m pytest tests/pytorch/ -rs
+# Check that docs can build
+docs-single-version:
+	sphinx-build docs/source docs/_build -a
+# Check that docs can build
+docs:
+	cd docs && bash build.sh

src/python-doctr/README.md ADDED Viewed

	@@ -0,0 +1,349 @@

+<p align="center">
+  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
+</p>
+[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v1.0.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
+**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
+What you can expect from this repository:
+- efficient ways to parse textual information (localize and identify each word) from your documents
+- guidance on how to integrate this in your current architecture
+![OCR_example](https://github.com/mindee/doctr/raw/main/docs/images/ocr.png)
+## Quick Tour
+### Getting your pretrained model
+End-to-End OCR is achieved in docTR using a two-stage approach: text detection (localizing words), then text recognition (identify all characters in the word).
+As such, you can select the architecture used for [text detection](https://mindee.github.io/doctr/latest/modules/models.html#doctr-models-detection), and the one for [text recognition](https://mindee.github.io/doctr/latest//modules/models.html#doctr-models-recognition) from the list of available implementations.
+```python
+from doctr.models import ocr_predictor
+model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
+```
+### Reading files
+Documents can be interpreted from PDF or images:
+```python
+from doctr.io import DocumentFile
+# PDF
+pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
+# Image
+single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
+# Webpage (requires `weasyprint` to be installed)
+webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
+# Multiple page images
+multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
+```
+### Putting it together
+Let's use the default pretrained model for an example:
+```python
+from doctr.io import DocumentFile
+from doctr.models import ocr_predictor
+model = ocr_predictor(pretrained=True)
+# PDF
+doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
+# Analyze
+result = model(doc)
+```
+### Dealing with rotated documents
+Should you use docTR on documents that include rotated pages, or pages with multiple box orientations,
+you have multiple options to handle it:
+- If you only use straight document pages with straight words (horizontal, same reading direction),
+consider passing `assume_straight_pages=True` to the ocr_predictor. It will directly fit straight boxes
+on your page and return straight boxes, which makes it the fastest option.
+- If you want the predictor to output straight boxes (no matter the orientation of your pages, the final localizations
+will be converted to straight boxes), you need to pass `export_as_straight_boxes=True` in the predictor. Otherwise, if `assume_straight_pages=False`, it will return rotated bounding boxes (potentially with an angle of 0°).
+If both options are set to False, the predictor will always fit and return rotated boxes.
+To interpret your model's predictions, you can visualize them interactively as follows:
+```python
+# Display the result (requires matplotlib & mplcursors to be installed)
+result.show()
+```
+![Visualization sample](https://github.com/mindee/doctr/raw/main/docs/images/doctr_example_script.gif)
+Or even rebuild the original document from its predictions:
+```python
+import matplotlib.pyplot as plt
+synthetic_pages = result.synthesize()
+plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
+```
+![Synthesis sample](https://github.com/mindee/doctr/raw/main/docs/images/synthesized_sample.png)
+The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
+To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
+You can also export them as a nested dict, more appropriate for JSON format:
+```python
+json_output = result.export()
+```
+### Use the KIE predictor
+The KIE predictor is a more flexible predictor compared to OCR as your detection model can detect multiple classes in a document. For example, you can have a detection model to detect just dates and addresses in a document.
+The KIE predictor makes it possible to use detector with multiple classes with a recognition model and to have the whole pipeline already setup for you.
+```python
+from doctr.io import DocumentFile
+from doctr.models import kie_predictor
+# Model
+model = kie_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
+# PDF
+doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
+# Analyze
+result = model(doc)
+predictions = result.pages[0].predictions
+for class_name in predictions.keys():
+    list_predictions = predictions[class_name]
+    for prediction in list_predictions:
+        print(f"Prediction for {class_name}: {prediction}")
+```
+The KIE predictor results per page are in a dictionary format with each key representing a class name and it's value are the predictions for that class.
+### If you are looking for support from the Mindee team
+[![Bad OCR test detection image asking the developer if they need help](https://github.com/mindee/doctr/raw/main/docs/images/doctr-need-help.png)](https://mindee.com/product/doctr)
+## Installation
+### Prerequisites
+Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
+### Latest release
+You can then install the latest release of the package using [pypi](https://pypi.org/project/python-doctr/) as follows:
+```shell
+pip install python-doctr
+```
+We try to keep extra dependencies to a minimum. You can install specific builds as follows:
+```shell
+# standard build
+pip install python-doctr
+# optional dependencies for visualization, html, and contrib modules can be installed as follows:
+pip install "python-doctr[viz,html,contrib]"
+```
+### Developer mode
+Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
+First clone the project repository:
+```shell
+git clone https://github.com/mindee/doctr.git
+pip install -e doctr/.
+```
+Again, if you prefer to avoid the risk of missing dependencies, you can install the build:
+```shell
+pip install -e doctr/.
+```
+## Models architectures
+Credits where it's due: this repository is implementing, among others, architectures from published research papers.
+### Text Detection
+- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
+- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
+- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
+### Text Recognition
+- CRNN: [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/pdf/1507.05717.pdf).
+- SAR: [Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/pdf/1811.00751.pdf).
+- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
+- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
+- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
+- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
+## More goodies
+### Documentation
+The full package documentation is available [here](https://mindee.github.io/doctr/) for detailed specifications.
+### Demo app
+A minimal demo app is provided for you to play with our end-to-end OCR models!
+![Demo app](https://github.com/mindee/doctr/raw/main/docs/images/demo_update.png)
+#### Live demo
+Courtesy of :hugs: [Hugging Face](https://huggingface.co/) :hugs:, docTR has now a fully deployed version available on [Spaces](https://huggingface.co/spaces)!
+Check it out [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr)
+#### Running it locally
+If you prefer to use it locally, there is an extra dependency ([Streamlit](https://streamlit.io/)) that is required.
+```shell
+pip install -r demo/pt-requirements.txt
+```
+Then run your app in your default browser with:
+```shell
+streamlit run demo/app.py
+```
+### Docker container
+We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
+#### Using GPU with docTR Docker Images
+The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU.
+Please ensure that Docker is configured to use your GPU.
+To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
+Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
+```shell
+docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
+```
+#### Available Tags
+The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
+- `<deps>`: `torch`, `torch-viz-html-contrib`.
+- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
+- `<doctr_version>`: a tag >= `v0.11.0`
+- `<YYYY-MM>`: e.g. `2014-10`
+Here are examples of different image tags:
+| Tag                        | Description                                       |
+|----------------------------|---------------------------------------------------|
+| `torch-viz-html-contrib-py3.11.8-2024-10`       | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
+| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
+#### Building Docker Images Locally
+You can also build docTR Docker images locally on your computer.
+```shell
+docker build -t doctr .
+```
+You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
+```shell
+docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
+```
+### Example script
+An example script is provided for a simple documentation analysis of a PDF or image file:
+```shell
+python scripts/analyze.py path/to/your/doc.pdf
+```
+All script arguments can be checked using `python scripts/analyze.py --help`
+### Minimal API integration
+Looking to integrate docTR into your API? Here is a template to get you started with a fully working API using the wonderful [FastAPI](https://github.com/tiangolo/fastapi) framework.
+#### Deploy your API locally
+Specific dependencies are required to run the API template, which you can install as follows:
+```shell
+cd api/
+pip install poetry
+make lock
+pip install -r requirements.txt
+```
+You can now run your API locally:
+```shell
+uvicorn --reload --workers 1 --host 0.0.0.0 --port=8002 --app-dir api/ app.main:app
+```
+Alternatively, you can run the same server on a docker container if you prefer using:
+```shell
+PORT=8002 docker-compose up -d --build
+```
+#### What you have deployed
+Your API should now be running locally on your port 8002. Access your automatically-built documentation at [http://localhost:8002/redoc](http://localhost:8002/redoc) and enjoy your three functional routes ("/detection", "/recognition", "/ocr", "/kie"). Here is an example with Python to send a request to the OCR route:
+```python
+import requests
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
+with open('/path/to/your/doc.jpg', 'rb') as f:
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("doc.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
+```
+### Example notebooks
+Looking for more illustrations of docTR features? You might want to check the [Jupyter notebooks](https://github.com/mindee/doctr/tree/main/notebooks) designed to give you a broader overview.
+## Citation
+If you wish to cite this project, feel free to use this [BibTeX](http://www.bibtex.org/) reference:
+```bibtex
+@misc{doctr2021,
+    title={docTR: Document Text Recognition},
+    author={Mindee},
+    year={2021},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/mindee/doctr}}
+}
+```
+## Contributing
+If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
+You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
+## License
+Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.

src/python-doctr/api/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ poetry.lock
2	+ requirements*

src/python-doctr/api/Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM tiangolo/uvicorn-gunicorn-fastapi:python3.10-slim
+WORKDIR /app
+# set environment variables
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+ENV PYTHONPATH "${PYTHONPATH}:/app"
+RUN apt-get update \
+    && apt-get install --no-install-recommends git ffmpeg libsm6 libxext6 make -y \
+    && apt-get autoremove -y \
+    && rm -rf /var/lib/apt/lists/*
+COPY pyproject.toml  /app/pyproject.toml
+COPY Makefile /app/Makefile
+RUN pip install --upgrade pip setuptools wheel \
+    && make lock \
+    && pip install -r /app/requirements.txt \
+    && pip cache purge \
+    && rm -rf /root/.cache/pip
+# copy project
+COPY app /app/app

src/python-doctr/api/Makefile ADDED Viewed

	@@ -0,0 +1,26 @@

+# api setup is borrowed from https://github.com/frgfm/Holocron/blob/main/api
+.PHONY: lock run stop test
+# Pin the dependencies
+lock:
+	pip install poetry>=1.0 poetry-plugin-export
+	poetry lock
+	poetry export -f requirements.txt --without-hashes --output requirements.txt
+	poetry export -f requirements.txt --without-hashes --with dev --output requirements-dev.txt
+# Run the docker
+run:
+	docker compose up -d --build
+# Run the docker
+stop:
+	docker compose down
+# Run tests for the library
+test:
+	docker compose up -d --build
+	docker cp requirements-dev.txt api_web:/app/requirements-dev.txt
+	docker compose exec -T web pip install -r requirements-dev.txt
+	docker cp tests api_web:/app/tests
+	docker compose exec -T web pytest tests/ -vv
+	docker compose down

src/python-doctr/api/README.md ADDED Viewed

	@@ -0,0 +1,194 @@

+# Template for your OCR API using docTR
+## Installation
+You will only need to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git), [Docker](https://docs.docker.com/get-docker/) and [poetry](https://python-poetry.org/docs/#installation). The container environment will be self-sufficient and install the remaining dependencies on its own.
+## Usage
+### Starting your web server
+You will need to clone the repository first, go into `api` folder and start the api:
+```shell
+git clone https://github.com/mindee/doctr.git
+cd doctr/api
+make run
+```
+Once completed, your [FastAPI](https://fastapi.tiangolo.com/) server should be running on port 8080.
+### Documentation and swagger
+FastAPI comes with many advantages including speed and OpenAPI features. For instance, once your server is running, you can access the automatically built documentation and swagger in your browser at: [http://localhost:8080/docs](http://localhost:8080/docs)
+### Using the routes
+You will find detailed instructions in the live documentation when your server is up, but here are some examples to use your available API routes:
+#### Text detection
+Using the following image:
+<img src="https://user-images.githubusercontent.com/76527547/117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg" width="50%" height="50%">
+with this snippet:
+```python
+import requests
+headers = {"accept": "application/json"}
+params = {"det_arch": "db_resnet50"}
+with open('/path/to/your/img.jpg', 'rb') as f:
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/detection", headers=headers, params=params, files=files).json())
+```
+should yield
+```json
+[
+  {
+    "name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
+    "geometries": [
+      [
+        0.8176307908857315,
+        0.1787109375,
+        0.9101580212741838,
+        0.2080078125
+      ],
+      [
+        0.7471996155154171,
+        0.1796875,
+        0.8272978149561669,
+        0.20703125
+      ]
+    ]
+  }
+]
+```
+#### Text recognition
+Using the following image:
+![recognition-sample](https://user-images.githubusercontent.com/76527547/117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg)
+with this snippet:
+```python
+import requests
+headers = {"accept": "application/json"}
+params = {"reco_arch": "crnn_vgg16_bn"}
+with open('/path/to/your/img.jpg', 'rb') as f:
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/recognition", headers=headers, params=params, files=files).json())
+```
+should yield
+```json
+[
+  {
+    "name": "117133599-c073fa00-ada4-11eb-831b-412de4d28341.jpeg",
+    "value": "invite",
+    "confidence": 1.0
+  }
+]
+```
+#### End-to-end OCR
+Using the following image:
+<img src="https://user-images.githubusercontent.com/76527547/117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg" width="50%" height="50%">
+with this snippet:
+```python
+import requests
+headers = {"accept": "application/json"}
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
+with open('/path/to/your/img.jpg', 'rb') as f:
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", headers=headers, params=params, files=files).json())
+```
+should yield
+```json
+[
+  {
+    "name": "117319856-fc35bf00-ae8b-11eb-9b51-ca5aba673466.jpg",
+    "orientation": {
+      "value": 0,
+      "confidence": null
+    },
+    "language": {
+      "value": null,
+      "confidence": null
+    },
+    "dimensions": [2339, 1654],
+    "items": [
+      {
+        "blocks": [
+          {
+            "geometry": [
+              0.7471996155154171,
+              0.1787109375,
+              0.9101580212741838,
+              0.2080078125
+            ],
+            "objectness_score": 0.5,
+            "lines": [
+              {
+                "geometry": [
+                  0.7471996155154171,
+                  0.1787109375,
+                  0.9101580212741838,
+                  0.2080078125
+                ],
+                "objectness_score": 0.5,
+                "words": [
+                  {
+                    "value": "Hello",
+                    "geometry": [
+                      0.7471996155154171,
+                      0.1796875,
+                      0.8272978149561669,
+                      0.20703125
+                    ],
+                    "objectness_score": 0.5,
+                    "confidence": 1.0,
+                    "crop_orientation": {"value": 0, "confidence": null}
+                  },
+                  {
+                    "value": "world!",
+                    "geometry": [
+                      0.8176307908857315,
+                      0.1787109375,
+                      0.9101580212741838,
+                      0.2080078125
+                    ],
+                    "objectness_score": 0.5,
+                    "confidence": 1.0,
+                    "crop_orientation": {"value": 0, "confidence": null}
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
+]
+```

src/python-doctr/api/app/config.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import os
+import doctr
+PROJECT_NAME: str = "docTR API template"
+PROJECT_DESCRIPTION: str = "Template API for Optical Character Recognition"
+VERSION: str = doctr.__version__
+DEBUG: bool = os.environ.get("DEBUG", "") != "False"

src/python-doctr/api/app/main.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import time
+from fastapi import FastAPI, Request
+from fastapi.openapi.utils import get_openapi
+from app import config as cfg
+from app.routes import detection, kie, ocr, recognition
+app = FastAPI(title=cfg.PROJECT_NAME, description=cfg.PROJECT_DESCRIPTION, debug=cfg.DEBUG, version=cfg.VERSION)
+# Routing
+app.include_router(recognition.router, prefix="/recognition", tags=["recognition"])
+app.include_router(detection.router, prefix="/detection", tags=["detection"])
+app.include_router(ocr.router, prefix="/ocr", tags=["ocr"])
+app.include_router(kie.router, prefix="/kie", tags=["kie"])
+# Middleware
+@app.middleware("http")
+async def add_process_time_header(request: Request, call_next):
+    start_time = time.time()
+    response = await call_next(request)
+    process_time = time.time() - start_time
+    response.headers["X-Process-Time"] = str(process_time)
+    return response
+# Docs
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+    openapi_schema = get_openapi(
+        title=cfg.PROJECT_NAME,
+        version=cfg.VERSION,
+        description=cfg.PROJECT_DESCRIPTION,
+        routes=app.routes,
+    )
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+app.openapi = custom_openapi

src/python-doctr/api/app/routes/detection.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from app.schemas import DetectionIn, DetectionOut
+from app.utils import get_documents, resolve_geometry
+from app.vision import init_predictor
+from doctr.file_utils import CLASS_NAME
+router = APIRouter()
+@router.post("/", response_model=list[DetectionOut], status_code=status.HTTP_200_OK, summary="Perform text detection")
+async def text_detection(request: DetectionIn = Depends(), files: list[UploadFile] = [File(...)]):
+    """Runs docTR text detection model to analyze the input image"""
+    try:
+        predictor = init_predictor(request)
+        content, filenames = await get_documents(files)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    return [
+        DetectionOut(
+            name=filename,
+            geometries=[
+                geom[:-1].tolist() if geom.shape == (5,) else resolve_geometry(geom[:4].tolist())
+                for geom in doc[CLASS_NAME]
+            ],
+        )
+        for doc, filename in zip(predictor(content), filenames)
+    ]

src/python-doctr/api/app/routes/kie.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from app.schemas import KIEElement, KIEIn, KIEOut
+from app.utils import get_documents, resolve_geometry
+from app.vision import init_predictor
+router = APIRouter()
+@router.post("/", response_model=list[KIEOut], status_code=status.HTTP_200_OK, summary="Perform KIE")
+async def perform_kie(request: KIEIn = Depends(), files: list[UploadFile] = [File(...)]):
+    """Runs docTR KIE model to analyze the input image"""
+    try:
+        predictor = init_predictor(request)
+        content, filenames = await get_documents(files)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    out = predictor(content)
+    results = [
+        KIEOut(
+            name=filenames[i],
+            orientation=page.orientation,
+            language=page.language,
+            dimensions=page.dimensions,
+            predictions=[
+                KIEElement(
+                    class_name=class_name,
+                    items=[
+                        dict(
+                            value=prediction.value,
+                            geometry=resolve_geometry(prediction.geometry),
+                            objectness_score=round(prediction.objectness_score, 2),
+                            confidence=round(prediction.confidence, 2),
+                            crop_orientation=prediction.crop_orientation,
+                        )
+                        for prediction in page.predictions[class_name]
+                    ],
+                )
+                for class_name in page.predictions.keys()
+            ],
+        )
+        for i, page in enumerate(out.pages)
+    ]
+    return results

src/python-doctr/api/app/routes/ocr.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from app.schemas import OCRBlock, OCRIn, OCRLine, OCROut, OCRPage, OCRWord
+from app.utils import get_documents, resolve_geometry
+from app.vision import init_predictor
+router = APIRouter()
+@router.post("/", response_model=list[OCROut], status_code=status.HTTP_200_OK, summary="Perform OCR")
+async def perform_ocr(request: OCRIn = Depends(), files: list[UploadFile] = [File(...)]):
+    """Runs docTR OCR model to analyze the input image"""
+    try:
+        # generator object to list
+        content, filenames = await get_documents(files)
+        predictor = init_predictor(request)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    out = predictor(content)
+    results = [
+        OCROut(
+            name=filenames[i],
+            orientation=page.orientation,
+            language=page.language,
+            dimensions=page.dimensions,
+            items=[
+                OCRPage(
+                    blocks=[
+                        OCRBlock(
+                            geometry=resolve_geometry(block.geometry),
+                            objectness_score=round(block.objectness_score, 2),
+                            lines=[
+                                OCRLine(
+                                    geometry=resolve_geometry(line.geometry),
+                                    objectness_score=round(line.objectness_score, 2),
+                                    words=[
+                                        OCRWord(
+                                            value=word.value,
+                                            geometry=resolve_geometry(word.geometry),
+                                            objectness_score=round(word.objectness_score, 2),
+                                            confidence=round(word.confidence, 2),
+                                            crop_orientation=word.crop_orientation,
+                                        )
+                                        for word in line.words
+                                    ],
+                                )
+                                for line in block.lines
+                            ],
+                        )
+                        for block in page.blocks
+                    ]
+                )
+            ],
+        )
+        for i, page in enumerate(out.pages)
+    ]
+    return results

src/python-doctr/api/app/routes/recognition.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from app.schemas import RecognitionIn, RecognitionOut
+from app.utils import get_documents
+from app.vision import init_predictor
+router = APIRouter()
+@router.post(
+    "/", response_model=list[RecognitionOut], status_code=status.HTTP_200_OK, summary="Perform text recognition"
+)
+async def text_recognition(request: RecognitionIn = Depends(), files: list[UploadFile] = [File(...)]):
+    """Runs docTR text recognition model to analyze the input image"""
+    try:
+        predictor = init_predictor(request)
+        content, filenames = await get_documents(files)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    return [
+        RecognitionOut(name=filename, value=res[0], confidence=round(res[1], 2))
+        for res, filename in zip(predictor(content), filenames)
+    ]

src/python-doctr/api/app/schemas.py ADDED Viewed

	@@ -0,0 +1,186 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any
+from pydantic import BaseModel, Field
+class KIEIn(BaseModel):
+    det_arch: str = Field(default="db_resnet50", examples=["db_resnet50"])
+    reco_arch: str = Field(default="crnn_vgg16_bn", examples=["crnn_vgg16_bn"])
+    assume_straight_pages: bool = Field(default=True, examples=[True])
+    preserve_aspect_ratio: bool = Field(default=True, examples=[True])
+    detect_orientation: bool = Field(default=False, examples=[False])
+    detect_language: bool = Field(default=False, examples=[False])
+    symmetric_pad: bool = Field(default=True, examples=[True])
+    straighten_pages: bool = Field(default=False, examples=[False])
+    det_bs: int = Field(default=2, examples=[2])
+    reco_bs: int = Field(default=128, examples=[128])
+    disable_page_orientation: bool = Field(default=False, examples=[False])
+    disable_crop_orientation: bool = Field(default=False, examples=[False])
+    bin_thresh: float = Field(default=0.1, examples=[0.1])
+    box_thresh: float = Field(default=0.1, examples=[0.1])
+class OCRIn(KIEIn, BaseModel):
+    resolve_lines: bool = Field(default=True, examples=[True])
+    resolve_blocks: bool = Field(default=False, examples=[False])
+    paragraph_break: float = Field(default=0.0035, examples=[0.0035])
+class RecognitionIn(BaseModel):
+    reco_arch: str = Field(default="crnn_vgg16_bn", examples=["crnn_vgg16_bn"])
+    reco_bs: int = Field(default=128, examples=[128])
+class DetectionIn(BaseModel):
+    det_arch: str = Field(default="db_resnet50", examples=["db_resnet50"])
+    assume_straight_pages: bool = Field(default=True, examples=[True])
+    preserve_aspect_ratio: bool = Field(default=True, examples=[True])
+    symmetric_pad: bool = Field(default=True, examples=[True])
+    det_bs: int = Field(default=2, examples=[2])
+    bin_thresh: float = Field(default=0.1, examples=[0.1])
+    box_thresh: float = Field(default=0.1, examples=[0.1])
+class RecognitionOut(BaseModel):
+    name: str = Field(..., examples=["example.jpg"])
+    value: str = Field(..., examples=["Hello"])
+    confidence: float = Field(..., examples=[0.99])
+class DetectionOut(BaseModel):
+    name: str = Field(..., examples=["example.jpg"])
+    geometries: list[list[float]] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
+class OCRWord(BaseModel):
+    value: str = Field(..., examples=["example"])
+    geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
+    objectness_score: float = Field(..., examples=[0.99])
+    confidence: float = Field(..., examples=[0.99])
+    crop_orientation: dict[str, Any] = Field(..., examples=[{"value": 0, "confidence": None}])
+class OCRLine(BaseModel):
+    geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
+    objectness_score: float = Field(..., examples=[0.99])
+    words: list[OCRWord] = Field(
+        ...,
+        examples=[
+            {
+                "value": "example",
+                "geometry": [0.0, 0.0, 0.0, 0.0],
+                "objectness_score": 0.99,
+                "confidence": 0.99,
+                "crop_orientation": {"value": 0, "confidence": None},
+            }
+        ],
+    )
+class OCRBlock(BaseModel):
+    geometry: list[float] = Field(..., examples=[[0.0, 0.0, 0.0, 0.0]])
+    objectness_score: float = Field(..., examples=[0.99])
+    lines: list[OCRLine] = Field(
+        ...,
+        examples=[
+            {
+                "geometry": [0.0, 0.0, 0.0, 0.0],
+                "objectness_score": 0.99,
+                "words": [
+                    {
+                        "value": "example",
+                        "geometry": [0.0, 0.0, 0.0, 0.0],
+                        "confidence": 0.99,
+                        "crop_orientation": {"value": 0, "confidence": None},
+                    }
+                ],
+            }
+        ],
+    )
+class OCRPage(BaseModel):
+    blocks: list[OCRBlock] = Field(
+        ...,
+        examples=[
+            {
+                "geometry": [0.0, 0.0, 0.0, 0.0],
+                "objectness_score": 0.99,
+                "lines": [
+                    {
+                        "geometry": [0.0, 0.0, 0.0, 0.0],
+                        "objectness_score": 0.99,
+                        "words": [
+                            {
+                                "value": "example",
+                                "geometry": [0.0, 0.0, 0.0, 0.0],
+                                "objectness_score": 0.99,
+                                "confidence": 0.99,
+                                "crop_orientation": {"value": 0, "confidence": None},
+                            }
+                        ],
+                    }
+                ],
+            }
+        ],
+    )
+class OCROut(BaseModel):
+    name: str = Field(..., examples=["example.jpg"])
+    orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}])
+    language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}])
+    dimensions: tuple[int, int] = Field(..., examples=[(100, 100)])
+    items: list[OCRPage] = Field(
+        ...,
+        examples=[
+            {
+                "geometry": [0.0, 0.0, 0.0, 0.0],
+                "objectness_score": 0.99,
+                "lines": [
+                    {
+                        "geometry": [0.0, 0.0, 0.0, 0.0],
+                        "objectness_score": 0.99,
+                        "words": [
+                            {
+                                "value": "example",
+                                "geometry": [0.0, 0.0, 0.0, 0.0],
+                                "objectness_score": 0.99,
+                                "confidence": 0.99,
+                                "crop_orientation": {"value": 0, "confidence": None},
+                            }
+                        ],
+                    }
+                ],
+            }
+        ],
+    )
+class KIEElement(BaseModel):
+    class_name: str = Field(..., examples=["example"])
+    items: list[dict[str, str | list[float] | float | dict[str, Any]]] = Field(
+        ...,
+        examples=[
+            {
+                "value": "example",
+                "geometry": [0.0, 0.0, 0.0, 0.0],
+                "objectness_score": 0.99,
+                "confidence": 0.99,
+                "crop_orientation": {"value": 0, "confidence": None},
+            }
+        ],
+    )
+class KIEOut(BaseModel):
+    name: str = Field(..., examples=["example.jpg"])
+    orientation: dict[str, float | None] = Field(..., examples=[{"value": 0.0, "confidence": 0.99}])
+    language: dict[str, str | float | None] = Field(..., examples=[{"value": "en", "confidence": 0.99}])
+    dimensions: tuple[int, int] = Field(..., examples=[(100, 100)])
+    predictions: list[KIEElement]

src/python-doctr/api/app/utils.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any
+import numpy as np
+from fastapi import UploadFile
+from doctr.io import DocumentFile
+def resolve_geometry(
+    geom: Any,
+) -> tuple[float, float, float, float] | tuple[float, float, float, float, float, float, float, float]:
+    if len(geom) == 4:
+        return (*geom[0], *geom[1], *geom[2], *geom[3])
+    return (*geom[0], *geom[1])
+async def get_documents(files: list[UploadFile]) -> tuple[list[np.ndarray], list[str]]:  # pragma: no cover
+    """Convert a list of UploadFile objects to lists of numpy arrays and their corresponding filenames
+    Args:
+        files: list of UploadFile objects
+    Returns:
+        tuple[list[np.ndarray], list[str]]: list of numpy arrays and their corresponding filenames
+    """
+    filenames = []
+    docs = []
+    for file in files:
+        mime_type = file.content_type
+        if mime_type in ["image/jpeg", "image/png"]:
+            docs.extend(DocumentFile.from_images([await file.read()]))
+            filenames.append(file.filename or "")
+        elif mime_type == "application/pdf":
+            pdf_content = DocumentFile.from_pdf(await file.read())
+            docs.extend(pdf_content)
+            filenames.extend([file.filename] * len(pdf_content) or [""] * len(pdf_content))
+        else:
+            raise ValueError(f"Unsupported file format: {mime_type} for file {file.filename}")
+    return docs, filenames

src/python-doctr/api/app/vision.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright (C) 2021-2025, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from collections.abc import Callable
+import torch
+from doctr.models import kie_predictor, ocr_predictor
+from .schemas import DetectionIn, KIEIn, OCRIn, RecognitionIn
+def _move_to_device(predictor: Callable) -> Callable:
+    """Move the predictor to the desired device
+    Args:
+        predictor: the predictor to move
+    Returns:
+        Callable: the predictor moved to the desired device
+    """
+    return predictor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+def init_predictor(request: KIEIn | OCRIn | RecognitionIn | DetectionIn) -> Callable:
+    """Initialize the predictor based on the request
+    Args:
+        request: input request
+    Returns:
+        Callable: the predictor
+    """
+    params = request.model_dump()
+    bin_thresh = params.pop("bin_thresh", None)
+    box_thresh = params.pop("box_thresh", None)
+    if isinstance(request, (OCRIn, RecognitionIn, DetectionIn)):
+        predictor = ocr_predictor(pretrained=True, **params)
+        predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+        predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
+        if isinstance(request, DetectionIn):
+            return _move_to_device(predictor.det_predictor)
+        elif isinstance(request, RecognitionIn):
+            return _move_to_device(predictor.reco_predictor)
+        return _move_to_device(predictor)
+    elif isinstance(request, KIEIn):
+        predictor = kie_predictor(pretrained=True, **params)
+        predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+        predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
+        return _move_to_device(predictor)

src/python-doctr/api/docker-compose.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+services:
+  web:
+    container_name: api_web
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: uvicorn app.main:app --reload --workers 1 --host 0.0.0.0 --port 8080
+    ports:
+      - 8080:8080