MaxMnemonic commited on
Commit
a3dd76f
·
verified ·
1 Parent(s): 904b2d3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -9
README.md CHANGED
@@ -38,7 +38,7 @@ pipeline_tag: image-text-to-text
38
  - 📊 **Better chart recognition 🛠️**
39
  - 📚 **One shot multi-page inference ⏱️**
40
 
41
- ## How to get started
42
 
43
  You can use transformers or docling to perform inference:
44
 
@@ -115,17 +115,23 @@ print(doc.export_to_markdown())
115
  <summary> 🚀 Fast Batch Inference Using VLLM</summary>
116
 
117
  ```python
118
- !pip install vllm
 
 
 
119
 
120
  import time
121
  import os
122
  from vllm import LLM, SamplingParams
123
  from PIL import Image
 
 
124
 
125
  # Configuration
126
  MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
127
- IMAGE_DIR = "images_dir"
128
- OUTPUT_DIR = "output_pred_dir"
 
129
  PROMPT_TEXT = "Convert page to Docling."
130
 
131
  # Ensure output directory exists
@@ -152,12 +158,29 @@ for idx, img_file in enumerate(image_files, 1):
152
  llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
153
  output = llm.generate([llm_input], sampling_params=sampling_params)[0]
154
 
155
- output_text = output.outputs[0].text
156
- output_filename = os.path.splitext(img_file)[0] + ".dt"
 
157
  output_path = os.path.join(OUTPUT_DIR, output_filename)
158
 
159
  with open(output_path, "w", encoding="utf-8") as f:
160
- f.write(output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  print(f"Total time: {time.time() - start_time:.2f} sec")
163
  ```
@@ -223,5 +246,7 @@ DocTags are integrated with Docling, which allows export to HTML, Markdown, and
223
  - **Finetuned from model:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
224
 
225
  **Repository:** [Docling](https://github.com/docling-project/docling)
226
- **Paper [optional]:** [Coming soon]
227
- **Demo [optional]:** [Coming soon]
 
 
 
38
  - 📊 **Better chart recognition 🛠️**
39
  - 📚 **One shot multi-page inference ⏱️**
40
 
41
+ ## ⌨️ Get started (code examples)
42
 
43
  You can use transformers or docling to perform inference:
44
 
 
115
  <summary> 🚀 Fast Batch Inference Using VLLM</summary>
116
 
117
  ```python
118
+ # Prerequisites:
119
+ # pip install vllm
120
+ # pip install docling_core
121
+ # place page images you want to convert into img/ dir
122
 
123
  import time
124
  import os
125
  from vllm import LLM, SamplingParams
126
  from PIL import Image
127
+ from docling_core.types.doc import DoclingDocument
128
+ from docling_core.types.doc.document import DocTagsDocument
129
 
130
  # Configuration
131
  MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
132
+ # IMAGE_DIR = "images_dir"
133
+ IMAGE_DIR = "img/"
134
+ OUTPUT_DIR = "out/"
135
  PROMPT_TEXT = "Convert page to Docling."
136
 
137
  # Ensure output directory exists
 
158
  llm_input = {"prompt": chat_template, "multi_modal_data": {"image": image}}
159
  output = llm.generate([llm_input], sampling_params=sampling_params)[0]
160
 
161
+ doctags = output.outputs[0].text
162
+ img_fn = os.path.splitext(img_file)[0]
163
+ output_filename = img_fn + ".dt"
164
  output_path = os.path.join(OUTPUT_DIR, output_filename)
165
 
166
  with open(output_path, "w", encoding="utf-8") as f:
167
+ f.write(doctags)
168
+
169
+ # To convert to Docling Document, MD, HTML, etc.:
170
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
171
+ doc = DoclingDocument(name="Document")
172
+ doc.load_from_doctags(doctags_doc)
173
+ # export as any format
174
+ # HTML
175
+ # print(doc.export_to_html())
176
+ # with open(output_file, "w", encoding="utf-8") as f:
177
+ # f.write(doc.export_to_html())
178
+ # MD
179
+ output_filename_md = img_fn + ".md"
180
+ output_path_md = os.path.join(OUTPUT_DIR, output_filename_md)
181
+ markdown = doc.export_to_markdown()
182
+ with open(output_path_md, "w", encoding="utf-8") as f:
183
+ f.write(markdown)
184
 
185
  print(f"Total time: {time.time() - start_time:.2f} sec")
186
  ```
 
246
  - **Finetuned from model:** Based on [Idefics3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (see technical summary)
247
 
248
  **Repository:** [Docling](https://github.com/docling-project/docling)
249
+
250
+ **Paper:** [Coming soon]
251
+
252
+ **Demo:** [Coming soon]