MaxMnemonic commited on
Commit
60e3cdc
·
verified ·
1 Parent(s): a3dd76f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -22
README.md CHANGED
@@ -31,12 +31,11 @@ pipeline_tag: image-text-to-text
31
  - 🔲 **OCR with Bounding Boxes** – OCR regions using a bounding box.
32
  - 📂 **General Document Processing** – Trained for both scientific and non-scientific documents.
33
  - 🔄 **Seamless Docling Integration** – Import into **Docling** and export in multiple formats.
34
- - 📚 **Multi-Page & Full Document Conversion** – Coming Soon.
35
- - 🧪 **Chemical Recognition** – Coming Soon.
36
 
37
  ### 🚧 *Coming soon!*
38
  - 📊 **Better chart recognition 🛠️**
39
  - 📚 **One shot multi-page inference ⏱️**
 
40
 
41
  ## ⌨️ Get started (code examples)
42
 
@@ -49,6 +48,7 @@ You can use transformers or docling to perform inference:
49
  # Prerequisites:
50
  # pip install torch
51
  # pip install docling_core
 
52
 
53
  import torch
54
  from docling_core.types.doc import DoclingDocument
@@ -96,15 +96,14 @@ doctags = processor.batch_decode(
96
 
97
  # Populate document
98
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
 
99
  # create a docling document
100
  doc = DoclingDocument(name="Document")
101
  doc.load_from_doctags(doctags_doc)
102
 
103
  # export as any format
104
  # HTML
105
- # print(doc.export_to_html())
106
- # with open(output_file, "w", encoding="utf-8") as f:
107
- # f.write(doc.export_to_html())
108
  # MD
109
  print(doc.export_to_markdown())
110
  ```
@@ -118,7 +117,7 @@ print(doc.export_to_markdown())
118
  # Prerequisites:
119
  # pip install vllm
120
  # pip install docling_core
121
- # place page images you want to convert into img/ dir
122
 
123
  import time
124
  import os
@@ -129,8 +128,7 @@ from docling_core.types.doc.document import DocTagsDocument
129
 
130
  # Configuration
131
  MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
132
- # IMAGE_DIR = "images_dir"
133
- IMAGE_DIR = "img/"
134
  OUTPUT_DIR = "out/"
135
  PROMPT_TEXT = "Convert page to Docling."
136
 
@@ -172,15 +170,11 @@ for idx, img_file in enumerate(image_files, 1):
172
  doc.load_from_doctags(doctags_doc)
173
  # export as any format
174
  # HTML
175
- # print(doc.export_to_html())
176
- # with open(output_file, "w", encoding="utf-8") as f:
177
- # f.write(doc.export_to_html())
178
  # MD
179
  output_filename_md = img_fn + ".md"
180
  output_path_md = os.path.join(OUTPUT_DIR, output_filename_md)
181
- markdown = doc.export_to_markdown()
182
- with open(output_path_md, "w", encoding="utf-8") as f:
183
- f.write(markdown)
184
 
185
  print(f"Total time: {time.time() - start_time:.2f} sec")
186
  ```
@@ -198,42 +192,49 @@ DocTags are integrated with Docling, which allows export to HTML, Markdown, and
198
  <tr>
199
  <td><b>Description</b></td>
200
  <td><b>Instruction</b></td>
 
201
  </tr>
202
  <tr>
203
  <td>Full conversion</td>
204
  <td>Convert this page to docling.</td>
 
205
  </tr>
206
  <tr>
207
  <td>Chart</td>
208
- <td>Convert chart to table (e.g., &lt;chart&gt;).</td>
 
209
  </tr>
210
  <tr>
211
  <td>Formula</td>
212
- <td>Convert formula to LaTeX (e.g., &lt;formula&gt;).</td>
 
213
  </tr>
214
  <tr>
215
  <td>Code</td>
216
- <td>Convert code to text (e.g., &lt;code&gt;).</td>
 
217
  </tr>
218
  <tr>
219
  <td>Table</td>
220
- <td>Convert table to OTSL (e.g., &lt;otsl&gt;). OTSL: <a href="https://arxiv.org/pdf/2305.03393">Lysak et al., 2023</a></td>
 
221
  </tr>
222
  <tr>
223
- <td>No-Code Actions/Pipelines</td>
224
  <td>OCR the text in a specific location: &lt;loc_155&gt;&lt;loc_233&gt;&lt;loc_206&gt;&lt;loc_237&gt;</td>
 
225
  </tr>
226
  <tr>
227
- <td></td>
228
  <td>Identify element at: &lt;loc_247&gt;&lt;loc_482&gt;&lt;10c_252&gt;&lt;loc_486&gt;</td>
 
229
  </tr>
230
  <tr>
231
- <td></td>
232
  <td>Find all 'text' elements on the page, retrieve all section headers.</td>
 
233
  </tr>
234
  <tr>
235
- <td></td>
236
  <td>Detect footer elements on the page.</td>
 
237
  </tr>
238
  </table>
239
 
 
31
  - 🔲 **OCR with Bounding Boxes** – OCR regions using a bounding box.
32
  - 📂 **General Document Processing** – Trained for both scientific and non-scientific documents.
33
  - 🔄 **Seamless Docling Integration** – Import into **Docling** and export in multiple formats.
 
 
34
 
35
  ### 🚧 *Coming soon!*
36
  - 📊 **Better chart recognition 🛠️**
37
  - 📚 **One shot multi-page inference ⏱️**
38
+ - 🧪 **Chemical Recognition**
39
 
40
  ## ⌨️ Get started (code examples)
41
 
 
48
  # Prerequisites:
49
  # pip install torch
50
  # pip install docling_core
51
+ # pip install transformers
52
 
53
  import torch
54
  from docling_core.types.doc import DoclingDocument
 
96
 
97
  # Populate document
98
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
99
+ print(doctags)
100
  # create a docling document
101
  doc = DoclingDocument(name="Document")
102
  doc.load_from_doctags(doctags_doc)
103
 
104
  # export as any format
105
  # HTML
106
+ # doc.save_as_html(output_file)
 
 
107
  # MD
108
  print(doc.export_to_markdown())
109
  ```
 
117
  # Prerequisites:
118
  # pip install vllm
119
  # pip install docling_core
120
+ # place page images you want to convert into "img/" dir
121
 
122
  import time
123
  import os
 
128
 
129
  # Configuration
130
  MODEL_PATH = "ds4sd/SmolDocling-256M-preview"
131
+ IMAGE_DIR = "img/" # Place your page images here
 
132
  OUTPUT_DIR = "out/"
133
  PROMPT_TEXT = "Convert page to Docling."
134
 
 
170
  doc.load_from_doctags(doctags_doc)
171
  # export as any format
172
  # HTML
173
+ # doc.save_as_html(output_file)
 
 
174
  # MD
175
  output_filename_md = img_fn + ".md"
176
  output_path_md = os.path.join(OUTPUT_DIR, output_filename_md)
177
+ doc.save_as_markdown(output_path_md)
 
 
178
 
179
  print(f"Total time: {time.time() - start_time:.2f} sec")
180
  ```
 
192
  <tr>
193
  <td><b>Description</b></td>
194
  <td><b>Instruction</b></td>
195
+ <td><b>Comment</b></td>
196
  </tr>
197
  <tr>
198
  <td>Full conversion</td>
199
  <td>Convert this page to docling.</td>
200
+ <td></td>
201
  </tr>
202
  <tr>
203
  <td>Chart</td>
204
+ <td>Convert chart to table.</td>
205
+ <td>(e.g., &lt;chart&gt;)</td>
206
  </tr>
207
  <tr>
208
  <td>Formula</td>
209
+ <td>Convert formula to LaTeX.</td>
210
+ <td>(e.g., &lt;formula&gt;)</td>
211
  </tr>
212
  <tr>
213
  <td>Code</td>
214
+ <td>Convert code to text.</td>
215
+ <td>(e.g., &lt;code&gt;)</td>
216
  </tr>
217
  <tr>
218
  <td>Table</td>
219
+ <td>Convert table to OTSL.</td>
220
+ <td>(e.g., &lt;otsl&gt;) OTSL: <a href="https://arxiv.org/pdf/2305.03393">Lysak et al., 2023</a></td>
221
  </tr>
222
  <tr>
223
+ <td rowspan=4>No-Code Actions/Pipelines</td>
224
  <td>OCR the text in a specific location: &lt;loc_155&gt;&lt;loc_233&gt;&lt;loc_206&gt;&lt;loc_237&gt;</td>
225
+ <td></td>
226
  </tr>
227
  <tr>
 
228
  <td>Identify element at: &lt;loc_247&gt;&lt;loc_482&gt;&lt;10c_252&gt;&lt;loc_486&gt;</td>
229
+ <td></td>
230
  </tr>
231
  <tr>
 
232
  <td>Find all 'text' elements on the page, retrieve all section headers.</td>
233
+ <td></td>
234
  </tr>
235
  <tr>
 
236
  <td>Detect footer elements on the page.</td>
237
+ <td></td>
238
  </tr>
239
  </table>
240