MaxMnemonic commited on
Commit
3e8f36f
·
verified ·
1 Parent(s): a4c943f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -71
README.md CHANGED
@@ -109,77 +109,6 @@ print(doc.export_to_markdown())
109
  </details>
110
 
111
 
112
- <details>
113
- <summary>Multi-page image inference using Tranformers</summary>
114
-
115
- ```python
116
- # Prerequisites:
117
- # pip install torch
118
- # pip install docling_core
119
-
120
- import torch
121
- from docling_core.types.doc import DoclingDocument
122
- from docling_core.types.doc.document import DocTagsDocument
123
- from transformers import AutoProcessor, AutoModelForVision2Seq
124
- from transformers.image_utils import load_image
125
-
126
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
127
-
128
- # Load images
129
- page_1 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
130
- page_2 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
131
-
132
- # Initialize processor and model
133
- processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
134
- model = AutoModelForVision2Seq.from_pretrained(
135
- "ds4sd/SmolDocling-256M-preview",
136
- torch_dtype=torch.bfloat16,
137
- _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
138
- ).to(DEVICE)
139
-
140
- # Create input messages
141
- messages = [
142
- {
143
- "role": "user",
144
- "content": [
145
- {"type": "image"},
146
- {"type": "image"},
147
- {"type": "text", "text": "Convert this document to docling."}
148
- ]
149
- },
150
- ]
151
-
152
- # Prepare inputs
153
- prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
154
- inputs = processor(text=prompt, images=[page_1, page_2], return_tensors="pt")
155
- inputs = inputs.to(DEVICE)
156
-
157
- # Generate outputs
158
- generated_ids = model.generate(**inputs, max_new_tokens=8192)
159
- prompt_length = inputs.input_ids.shape[1]
160
- trimmed_generated_ids = generated_ids[:, prompt_length:]
161
- doctags = processor.batch_decode(
162
- trimmed_generated_ids,
163
- skip_special_tokens=False,
164
- )[0].lstrip()
165
-
166
- # populate it
167
- doctags_split = doctags.split("<page_break>")
168
- doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(doctags_split, [page_1, page_2])
169
- # create a docling document
170
- doc = DoclingDocument(name="Document")
171
- doc.load_from_doctags(doctags_doc)
172
-
173
- # export as any format
174
- # HTML
175
- # print(doc.export_to_html())
176
- # with open(output_file, "w", encoding="utf-8") as f:
177
- # f.write(doc.export_to_html())
178
- # MD
179
- print(doc.export_to_markdown())
180
- ``````
181
- </details>
182
-
183
  <details>
184
  <summary> 🚀 Fast Batch Inference Using VLLM</summary>
185
 
 
109
  </details>
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  <details>
113
  <summary> 🚀 Fast Batch Inference Using VLLM</summary>
114