MaxMnemonic commited on
Commit
a4c943f
·
verified ·
1 Parent(s): e9cfa1f

Update README.md

Browse files

Updated single page and multipage examples

Files changed (1) hide show
  1. README.md +24 -13
README.md CHANGED
@@ -44,15 +44,20 @@ You can use transformers or docling to perform inference:
44
  <summary>Single image inference using Tranformers</summary>
45
 
46
  ```python
 
 
 
 
47
  import torch
48
- from PIL import Image
 
49
  from transformers import AutoProcessor, AutoModelForVision2Seq
50
  from transformers.image_utils import load_image
51
 
52
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
53
 
54
  # Load images
55
- image = load_image("https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg")
56
 
57
  # Initialize processor and model
58
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
@@ -88,8 +93,9 @@ doctags = processor.batch_decode(
88
  )[0].lstrip()
89
 
90
  # Populate document
91
- doctags_doc = DoclingDocument.from_doctags_and_image_pairs([doctags], images)
92
- doc = DoclingDocument() # Initialize doc if needed
 
93
  doc.load_from_doctags(doctags_doc)
94
 
95
  # export as any format
@@ -98,8 +104,7 @@ doc.load_from_doctags(doctags_doc)
98
  # with open(output_file, "w", encoding="utf-8") as f:
99
  # f.write(doc.export_to_html())
100
  # MD
101
- # print(doc.export_to_markdown())
102
-
103
  ```
104
  </details>
105
 
@@ -108,16 +113,21 @@ doc.load_from_doctags(doctags_doc)
108
  <summary>Multi-page image inference using Tranformers</summary>
109
 
110
  ```python
 
 
 
 
111
  import torch
112
- from PIL import Image
 
113
  from transformers import AutoProcessor, AutoModelForVision2Seq
114
  from transformers.image_utils import load_image
115
 
116
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
117
 
118
  # Load images
119
- page_1 = load_image("https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg")
120
- page_2 = load_image("https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg")
121
 
122
  # Initialize processor and model
123
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
@@ -153,11 +163,12 @@ doctags = processor.batch_decode(
153
  skip_special_tokens=False,
154
  )[0].lstrip()
155
 
 
 
 
156
  # create a docling document
157
  doc = DoclingDocument(name="Document")
158
-
159
- # populate it
160
- doc.load_from_document_tokens([doctags], [page_1, page_2])
161
 
162
  # export as any format
163
  # HTML
@@ -165,7 +176,7 @@ doc.load_from_document_tokens([doctags], [page_1, page_2])
165
  # with open(output_file, "w", encoding="utf-8") as f:
166
  # f.write(doc.export_to_html())
167
  # MD
168
- # print(doc.export_to_markdown())
169
  ``````
170
  </details>
171
 
 
44
  <summary>Single image inference using Tranformers</summary>
45
 
46
  ```python
47
+ # Prerequisites:
48
+ # pip install torch
49
+ # pip install docling_core
50
+
51
  import torch
52
+ from docling_core.types.doc import DoclingDocument
53
+ from docling_core.types.doc.document import DocTagsDocument
54
  from transformers import AutoProcessor, AutoModelForVision2Seq
55
  from transformers.image_utils import load_image
56
 
57
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
58
 
59
  # Load images
60
+ image = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
61
 
62
  # Initialize processor and model
63
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 
93
  )[0].lstrip()
94
 
95
  # Populate document
96
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
97
+ # create a docling document
98
+ doc = DoclingDocument(name="Document")
99
  doc.load_from_doctags(doctags_doc)
100
 
101
  # export as any format
 
104
  # with open(output_file, "w", encoding="utf-8") as f:
105
  # f.write(doc.export_to_html())
106
  # MD
107
+ print(doc.export_to_markdown())
 
108
  ```
109
  </details>
110
 
 
113
  <summary>Multi-page image inference using Tranformers</summary>
114
 
115
  ```python
116
+ # Prerequisites:
117
+ # pip install torch
118
+ # pip install docling_core
119
+
120
  import torch
121
+ from docling_core.types.doc import DoclingDocument
122
+ from docling_core.types.doc.document import DocTagsDocument
123
  from transformers import AutoProcessor, AutoModelForVision2Seq
124
  from transformers.image_utils import load_image
125
 
126
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
127
 
128
  # Load images
129
+ page_1 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
130
+ page_2 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
131
 
132
  # Initialize processor and model
133
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 
163
  skip_special_tokens=False,
164
  )[0].lstrip()
165
 
166
+ # populate it
167
+ doctags_split = doctags.split("<page_break>")
168
+ doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(doctags_split, [page_1, page_2])
169
  # create a docling document
170
  doc = DoclingDocument(name="Document")
171
+ doc.load_from_doctags(doctags_doc)
 
 
172
 
173
  # export as any format
174
  # HTML
 
176
  # with open(output_file, "w", encoding="utf-8") as f:
177
  # f.write(doc.export_to_html())
178
  # MD
179
+ print(doc.export_to_markdown())
180
  ``````
181
  </details>
182