Update README.md
Browse filesUpdated single page and multipage examples
README.md
CHANGED
@@ -44,15 +44,20 @@ You can use transformers or docling to perform inference:
|
|
44 |
<summary>Single image inference using Tranformers</summary>
|
45 |
|
46 |
```python
|
|
|
|
|
|
|
|
|
47 |
import torch
|
48 |
-
from
|
|
|
49 |
from transformers import AutoProcessor, AutoModelForVision2Seq
|
50 |
from transformers.image_utils import load_image
|
51 |
|
52 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
53 |
|
54 |
# Load images
|
55 |
-
image = load_image("https://
|
56 |
|
57 |
# Initialize processor and model
|
58 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
@@ -88,8 +93,9 @@ doctags = processor.batch_decode(
|
|
88 |
)[0].lstrip()
|
89 |
|
90 |
# Populate document
|
91 |
-
doctags_doc =
|
92 |
-
|
|
|
93 |
doc.load_from_doctags(doctags_doc)
|
94 |
|
95 |
# export as any format
|
@@ -98,8 +104,7 @@ doc.load_from_doctags(doctags_doc)
|
|
98 |
# with open(output_file, "w", encoding="utf-8") as f:
|
99 |
# f.write(doc.export_to_html())
|
100 |
# MD
|
101 |
-
|
102 |
-
|
103 |
```
|
104 |
</details>
|
105 |
|
@@ -108,16 +113,21 @@ doc.load_from_doctags(doctags_doc)
|
|
108 |
<summary>Multi-page image inference using Tranformers</summary>
|
109 |
|
110 |
```python
|
|
|
|
|
|
|
|
|
111 |
import torch
|
112 |
-
from
|
|
|
113 |
from transformers import AutoProcessor, AutoModelForVision2Seq
|
114 |
from transformers.image_utils import load_image
|
115 |
|
116 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
117 |
|
118 |
# Load images
|
119 |
-
page_1 = load_image("https://
|
120 |
-
page_2 = load_image("https://
|
121 |
|
122 |
# Initialize processor and model
|
123 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
@@ -153,11 +163,12 @@ doctags = processor.batch_decode(
|
|
153 |
skip_special_tokens=False,
|
154 |
)[0].lstrip()
|
155 |
|
|
|
|
|
|
|
156 |
# create a docling document
|
157 |
doc = DoclingDocument(name="Document")
|
158 |
-
|
159 |
-
# populate it
|
160 |
-
doc.load_from_document_tokens([doctags], [page_1, page_2])
|
161 |
|
162 |
# export as any format
|
163 |
# HTML
|
@@ -165,7 +176,7 @@ doc.load_from_document_tokens([doctags], [page_1, page_2])
|
|
165 |
# with open(output_file, "w", encoding="utf-8") as f:
|
166 |
# f.write(doc.export_to_html())
|
167 |
# MD
|
168 |
-
|
169 |
``````
|
170 |
</details>
|
171 |
|
|
|
44 |
<summary>Single image inference using Tranformers</summary>
|
45 |
|
46 |
```python
|
47 |
+
# Prerequisites:
|
48 |
+
# pip install torch
|
49 |
+
# pip install docling_core
|
50 |
+
|
51 |
import torch
|
52 |
+
from docling_core.types.doc import DoclingDocument
|
53 |
+
from docling_core.types.doc.document import DocTagsDocument
|
54 |
from transformers import AutoProcessor, AutoModelForVision2Seq
|
55 |
from transformers.image_utils import load_image
|
56 |
|
57 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
58 |
|
59 |
# Load images
|
60 |
+
image = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
|
61 |
|
62 |
# Initialize processor and model
|
63 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
|
|
93 |
)[0].lstrip()
|
94 |
|
95 |
# Populate document
|
96 |
+
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
|
97 |
+
# create a docling document
|
98 |
+
doc = DoclingDocument(name="Document")
|
99 |
doc.load_from_doctags(doctags_doc)
|
100 |
|
101 |
# export as any format
|
|
|
104 |
# with open(output_file, "w", encoding="utf-8") as f:
|
105 |
# f.write(doc.export_to_html())
|
106 |
# MD
|
107 |
+
print(doc.export_to_markdown())
|
|
|
108 |
```
|
109 |
</details>
|
110 |
|
|
|
113 |
<summary>Multi-page image inference using Tranformers</summary>
|
114 |
|
115 |
```python
|
116 |
+
# Prerequisites:
|
117 |
+
# pip install torch
|
118 |
+
# pip install docling_core
|
119 |
+
|
120 |
import torch
|
121 |
+
from docling_core.types.doc import DoclingDocument
|
122 |
+
from docling_core.types.doc.document import DocTagsDocument
|
123 |
from transformers import AutoProcessor, AutoModelForVision2Seq
|
124 |
from transformers.image_utils import load_image
|
125 |
|
126 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
127 |
|
128 |
# Load images
|
129 |
+
page_1 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
|
130 |
+
page_2 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
|
131 |
|
132 |
# Initialize processor and model
|
133 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
|
|
163 |
skip_special_tokens=False,
|
164 |
)[0].lstrip()
|
165 |
|
166 |
+
# populate it
|
167 |
+
doctags_split = doctags.split("<page_break>")
|
168 |
+
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(doctags_split, [page_1, page_2])
|
169 |
# create a docling document
|
170 |
doc = DoclingDocument(name="Document")
|
171 |
+
doc.load_from_doctags(doctags_doc)
|
|
|
|
|
172 |
|
173 |
# export as any format
|
174 |
# HTML
|
|
|
176 |
# with open(output_file, "w", encoding="utf-8") as f:
|
177 |
# f.write(doc.export_to_html())
|
178 |
# MD
|
179 |
+
print(doc.export_to_markdown())
|
180 |
``````
|
181 |
</details>
|
182 |
|