Update README.md
Browse files
README.md
CHANGED
@@ -38,7 +38,7 @@ pipeline_tag: image-text-to-text
|
|
38 |
- 📊 **Better chart recognition 🛠️**
|
39 |
- 📚 **One shot multi-page inference ⏱️**
|
40 |
|
41 |
-
##
|
42 |
|
43 |
You can use transformers or docling to perform inference:
|
44 |
|
@@ -115,17 +115,23 @@ print(doc.export_to_markdown())
|
|
115 |
<summary> 🚀 Fast Batch Inference Using VLLM</summary>
|
116 |
|
117 |
```python
|
118 |
-
|
|
|
|
|
|
|
119 |
|
120 |
import time
|
121 |
import os
|
122 |
from vllm import LLM, SamplingParams
|
123 |
from PIL import Image
|
|
|
|
|
124 |
|
125 |
# Configuration
|
126 |
MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
|
127 |
-
IMAGE_DIR = "images_dir"
|
128 |
-
|
|
|
129 |
PROMPT_TEXT = "Convert page to Docling."
|
130 |
|
131 |
# Ensure output directory exists
|
@@ -152,12 +158,29 @@ for idx, img_file in enumerate(image_files, 1):
|
|
152 |
llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
|
153 |
output = llm.generate([llm_input], sampling_params=sampling_params)[0]
|
154 |
|
155 |
-
|
156 |
-
|
|
|
157 |
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
158 |
|
159 |
with open(output_path, "w", encoding="utf-8") as f:
|
160 |
-
f.write(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
print(f"Total time: {time.time() - start_time:.2f} sec")
|
163 |
```
|
@@ -223,5 +246,7 @@ DocTags are integrated with Docling, which allows export to HTML, Markdown, and
|
|
223 |
- **Finetuned from model:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
|
224 |
|
225 |
**Repository:** [Docling](https://github.com/docling-project/docling)
|
226 |
-
|
227 |
-
**
|
|
|
|
|
|
38 |
- 📊 **Better chart recognition 🛠️**
|
39 |
- 📚 **One shot multi-page inference ⏱️**
|
40 |
|
41 |
+
## ⌨️ Get started (code examples)
|
42 |
|
43 |
You can use transformers or docling to perform inference:
|
44 |
|
|
|
115 |
<summary> 🚀 Fast Batch Inference Using VLLM</summary>
|
116 |
|
117 |
```python
|
118 |
+
# Prerequisites:
|
119 |
+
# pip install vllm
|
120 |
+
# pip install docling_core
|
121 |
+
# place page images you want to convert into img/ dir
|
122 |
|
123 |
import time
|
124 |
import os
|
125 |
from vllm import LLM, SamplingParams
|
126 |
from PIL import Image
|
127 |
+
from docling_core.types.doc import DoclingDocument
|
128 |
+
from docling_core.types.doc.document import DocTagsDocument
|
129 |
|
130 |
# Configuration
|
131 |
MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
|
132 |
+
# IMAGE_DIR = "images_dir"
|
133 |
+
IMAGE_DIR = "img/"
|
134 |
+
OUTPUT_DIR = "out/"
|
135 |
PROMPT_TEXT = "Convert page to Docling."
|
136 |
|
137 |
# Ensure output directory exists
|
|
|
158 |
llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
|
159 |
output = llm.generate([llm_input], sampling_params=sampling_params)[0]
|
160 |
|
161 |
+
doctags = output.outputs[0].text
|
162 |
+
img_fn = os.path.splitext(img_file)[0]
|
163 |
+
output_filename = img_fn + ".dt"
|
164 |
output_path = os.path.join(OUTPUT_DIR, output_filename)
|
165 |
|
166 |
with open(output_path, "w", encoding="utf-8") as f:
|
167 |
+
f.write(doctags)
|
168 |
+
|
169 |
+
# To convert to Docling Document, MD, HTML, etc.:
|
170 |
+
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
|
171 |
+
doc = DoclingDocument(name="Document")
|
172 |
+
doc.load_from_doctags(doctags_doc)
|
173 |
+
# export as any format
|
174 |
+
# HTML
|
175 |
+
# print(doc.export_to_html())
|
176 |
+
# with open(output_file, "w", encoding="utf-8") as f:
|
177 |
+
# f.write(doc.export_to_html())
|
178 |
+
# MD
|
179 |
+
output_filename_md = img_fn + ".md"
|
180 |
+
output_path_md = os.path.join(OUTPUT_DIR, output_filename_md)
|
181 |
+
markdown = doc.export_to_markdown()
|
182 |
+
with open(output_path_md, "w", encoding="utf-8") as f:
|
183 |
+
f.write(markdown)
|
184 |
|
185 |
print(f"Total time: {time.time() - start_time:.2f} sec")
|
186 |
```
|
|
|
246 |
- **Finetuned from model:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
|
247 |
|
248 |
**Repository:** [Docling](https://github.com/docling-project/docling)
|
249 |
+
|
250 |
+
**Paper:** [Coming soon]
|
251 |
+
|
252 |
+
**Demo:** [Coming soon]
|