Update README.md
Browse files
README.md
CHANGED
@@ -109,77 +109,6 @@ print(doc.export_to_markdown())
|
|
109 |
</details>
|
110 |
|
111 |
|
112 |
-
<details>
|
113 |
-
<summary>Multi-page image inference using Tranformers</summary>
|
114 |
-
|
115 |
-
```python
|
116 |
-
# Prerequisites:
|
117 |
-
# pip install torch
|
118 |
-
# pip install docling_core
|
119 |
-
|
120 |
-
import torch
|
121 |
-
from docling_core.types.doc import DoclingDocument
|
122 |
-
from docling_core.types.doc.document import DocTagsDocument
|
123 |
-
from transformers import AutoProcessor, AutoModelForVision2Seq
|
124 |
-
from transformers.image_utils import load_image
|
125 |
-
|
126 |
-
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
127 |
-
|
128 |
-
# Load images
|
129 |
-
page_1 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
|
130 |
-
page_2 = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")
|
131 |
-
|
132 |
-
# Initialize processor and model
|
133 |
-
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
134 |
-
model = AutoModelForVision2Seq.from_pretrained(
|
135 |
-
"ds4sd/SmolDocling-256M-preview",
|
136 |
-
torch_dtype=torch.bfloat16,
|
137 |
-
_attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
|
138 |
-
).to(DEVICE)
|
139 |
-
|
140 |
-
# Create input messages
|
141 |
-
messages = [
|
142 |
-
{
|
143 |
-
"role": "user",
|
144 |
-
"content": [
|
145 |
-
{"type": "image"},
|
146 |
-
{"type": "image"},
|
147 |
-
{"type": "text", "text": "Convert this document to docling."}
|
148 |
-
]
|
149 |
-
},
|
150 |
-
]
|
151 |
-
|
152 |
-
# Prepare inputs
|
153 |
-
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
|
154 |
-
inputs = processor(text=prompt, images=[page_1, page_2], return_tensors="pt")
|
155 |
-
inputs = inputs.to(DEVICE)
|
156 |
-
|
157 |
-
# Generate outputs
|
158 |
-
generated_ids = model.generate(**inputs, max_new_tokens=8192)
|
159 |
-
prompt_length = inputs.input_ids.shape[1]
|
160 |
-
trimmed_generated_ids = generated_ids[:, prompt_length:]
|
161 |
-
doctags = processor.batch_decode(
|
162 |
-
trimmed_generated_ids,
|
163 |
-
skip_special_tokens=False,
|
164 |
-
)[0].lstrip()
|
165 |
-
|
166 |
-
# populate it
|
167 |
-
doctags_split = doctags.split("<page_break>")
|
168 |
-
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(doctags_split, [page_1, page_2])
|
169 |
-
# create a docling document
|
170 |
-
doc = DoclingDocument(name="Document")
|
171 |
-
doc.load_from_doctags(doctags_doc)
|
172 |
-
|
173 |
-
# export as any format
|
174 |
-
# HTML
|
175 |
-
# print(doc.export_to_html())
|
176 |
-
# with open(output_file, "w", encoding="utf-8") as f:
|
177 |
-
# f.write(doc.export_to_html())
|
178 |
-
# MD
|
179 |
-
print(doc.export_to_markdown())
|
180 |
-
``````
|
181 |
-
</details>
|
182 |
-
|
183 |
<details>
|
184 |
<summary> 🚀 Fast Batch Inference Using VLLM</summary>
|
185 |
|
|
|
109 |
</details>
|
110 |
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
<details>
|
113 |
<summary> 🚀 Fast Batch Inference Using VLLM</summary>
|
114 |
|