openvino-ci commited on
Commit
296dd8c
·
verified ·
1 Parent(s): e8a2c35

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "_name_or_path": "distil-whisper/distil-large-v3",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
@@ -39,7 +40,7 @@
39
  "num_mel_bins": 128,
40
  "pad_token_id": 50256,
41
  "scale_embedding": false,
42
- "transformers_version": "4.45.2",
43
  "use_cache": true,
44
  "use_weighted_layer_sum": false,
45
  "vocab_size": 51866
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
  "_name_or_path": "distil-whisper/distil-large-v3",
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
40
  "num_mel_bins": 128,
41
  "pad_token_id": 50256,
42
  "scale_embedding": false,
43
+ "transformers_version": "4.46.3",
44
  "use_cache": true,
45
  "use_weighted_layer_sum": false,
46
  "vocab_size": 51866
generation_config.json CHANGED
@@ -303,5 +303,5 @@
303
  "transcribe": 50360,
304
  "translate": 50359
305
  },
306
- "transformers_version": "4.45.2"
307
  }
 
303
  "transcribe": 50360,
304
  "translate": 50359
305
  },
306
+ "transformers_version": "4.46.3"
307
  }
openvino_decoder_model.xml CHANGED
@@ -10,7 +10,7 @@
10
  </port>
11
  </output>
12
  </layer>
13
- <layer id="0" name="Parameter_25934" type="Parameter" version="opset1">
14
  <data shape="?,?,?" element_type="f32" />
15
  <output>
16
  <port id="0" precision="FP32" names="encoder_hidden_states">
@@ -29,7 +29,7 @@
29
  </port>
30
  </output>
31
  </layer>
32
- <layer id="3" name="Convert_42265" type="Convert" version="opset1">
33
  <data destination_type="f16" />
34
  <input>
35
  <port id="0" precision="U8">
@@ -53,7 +53,7 @@
53
  </port>
54
  </output>
55
  </layer>
56
- <layer id="5" name="Convert_42268" type="Convert" version="opset1">
57
  <data destination_type="f16" />
58
  <input>
59
  <port id="0" precision="U8">
@@ -130,7 +130,7 @@
130
  </port>
131
  </output>
132
  </layer>
133
- <layer id="10" name="Constant_31368" type="Const" version="opset1">
134
  <data element_type="i64" shape="2" offset="66544078" size="16" />
135
  <output>
136
  <port id="0" precision="I64">
@@ -207,7 +207,7 @@
207
  </port>
208
  </output>
209
  </layer>
210
- <layer id="16" name="Convert_42496" type="Convert" version="opset1">
211
  <data destination_type="f16" />
212
  <input>
213
  <port id="0" precision="U8">
@@ -231,7 +231,7 @@
231
  </port>
232
  </output>
233
  </layer>
234
- <layer id="18" name="Convert_42499" type="Convert" version="opset1">
235
  <data destination_type="f16" />
236
  <input>
237
  <port id="0" precision="U8">
@@ -314,7 +314,7 @@
314
  <port id="0" precision="I64" names="23" />
315
  </output>
316
  </layer>
317
- <layer id="24" name="ShapeOf_31341" type="ShapeOf" version="opset3">
318
  <data output_type="i64" />
319
  <input>
320
  <port id="0" precision="I64">
@@ -328,19 +328,19 @@
328
  </port>
329
  </output>
330
  </layer>
331
- <layer id="25" name="Constant_31342" type="Const" version="opset1">
332
  <data element_type="i64" shape="" offset="67118890" size="8" />
333
  <output>
334
  <port id="0" precision="I64" />
335
  </output>
336
  </layer>
337
- <layer id="26" name="Constant_31343" type="Const" version="opset1">
338
  <data element_type="i64" shape="" offset="67118882" size="8" />
339
  <output>
340
  <port id="0" precision="I64" />
341
  </output>
342
  </layer>
343
- <layer id="27" name="Gather_31344" type="Gather" version="opset8">
344
  <data batch_dims="0" />
345
  <input>
346
  <port id="0" precision="I64">
@@ -401,7 +401,7 @@
401
  </port>
402
  </output>
403
  </layer>
404
- <layer id="32" name="Constant_25931" type="Const" version="opset1">
405
  <data element_type="i32" shape="" offset="66544094" size="4" />
406
  <output>
407
  <port id="0" precision="I32" />
@@ -478,7 +478,7 @@
478
  </port>
479
  </output>
480
  </layer>
481
- <layer id="37" name="Constant_31109" type="Const" version="opset1">
482
  <data element_type="f32" shape="1, 1, 1280" offset="67118906" size="5120" />
483
  <output>
484
  <port id="0" precision="FP32">
@@ -510,7 +510,7 @@
510
  </port>
511
  </output>
512
  </layer>
513
- <layer id="39" name="Constant_31110" type="Const" version="opset1">
514
  <data element_type="f32" shape="1, 1, 1280" offset="67124026" size="5120" />
515
  <output>
516
  <port id="0" precision="FP32">
@@ -542,7 +542,7 @@
542
  </port>
543
  </output>
544
  </layer>
545
- <layer id="41" name="Constant_31012" type="Const" version="opset1">
546
  <data element_type="u8" shape="1280, 1280" offset="67129146" size="1638400" />
547
  <output>
548
  <port id="0" precision="U8">
@@ -551,7 +551,7 @@
551
  </port>
552
  </output>
553
  </layer>
554
- <layer id="42" name="Convert_42386" type="Convert" version="opset1">
555
  <data destination_type="f16" />
556
  <input>
557
  <port id="0" precision="U8">
@@ -566,7 +566,7 @@
566
  </port>
567
  </output>
568
  </layer>
569
- <layer id="43" name="Constant_31012/zero_point" type="Const" version="opset1">
570
  <data element_type="u8" shape="1280, 1" offset="68767546" size="1280" />
571
  <output>
572
  <port id="0" precision="U8">
@@ -575,7 +575,7 @@
575
  </port>
576
  </output>
577
  </layer>
578
- <layer id="44" name="Convert_42389" type="Convert" version="opset1">
579
  <data destination_type="f16" />
580
  <input>
581
  <port id="0" precision="U8">
@@ -590,7 +590,7 @@
590
  </port>
591
  </output>
592
  </layer>
593
- <layer id="45" name="Constant_31012/zero_point/subtract" type="Subtract" version="opset1">
594
  <data auto_broadcast="numpy" />
595
  <input>
596
  <port id="0" precision="FP16">
@@ -609,7 +609,7 @@
609
  </port>
610
  </output>
611
  </layer>
612
- <layer id="46" name="Constant_31012/scale" type="Const" version="opset1">
613
  <data element_type="f16" shape="1280, 1" offset="68768826" size="2560" />
614
  <output>
615
  <port id="0" precision="FP16">
@@ -618,7 +618,7 @@
618
  </port>
619
  </output>
620
  </layer>
621
- <layer id="47" name="Constant_31012/fq_weights_1" type="Multiply" version="opset1">
622
  <data auto_broadcast="numpy" />
623
  <input>
624
  <port id="0" precision="FP16">
@@ -637,7 +637,7 @@
637
  </port>
638
  </output>
639
  </layer>
640
- <layer id="48" name="Constant_31012/fq_weights_1/convert" type="Convert" version="opset1">
641
  <data destination_type="f32" />
642
  <input>
643
  <port id="0" precision="FP16">
@@ -652,7 +652,7 @@
652
  </port>
653
  </output>
654
  </layer>
655
- <layer id="49" name="Multiply_30995" type="MatMul" version="opset1">
656
  <data transpose_a="false" transpose_b="true" />
657
  <input>
658
  <port id="0" precision="FP32">
@@ -673,7 +673,7 @@
673
  </port>
674
  </output>
675
  </layer>
676
- <layer id="50" name="Constant_31111" type="Const" version="opset1">
677
  <data element_type="f32" shape="1, 1, 1280" offset="68771386" size="5120" />
678
  <output>
679
  <port id="0" precision="FP32">
@@ -705,7 +705,7 @@
705
  </port>
706
  </output>
707
  </layer>
708
- <layer id="52" name="Constant_31369" type="Const" version="opset1">
709
  <data element_type="i64" shape="4" offset="68776506" size="32" />
710
  <output>
711
  <port id="0" precision="I64">
@@ -772,7 +772,7 @@
772
  </port>
773
  </output>
774
  </layer>
775
- <layer id="57" name="Convert_42375" type="Convert" version="opset1">
776
  <data destination_type="f16" />
777
  <input>
778
  <port id="0" precision="U8">
@@ -796,7 +796,7 @@
796
  </port>
797
  </output>
798
  </layer>
799
- <layer id="59" name="Convert_42378" type="Convert" version="opset1">
800
  <data destination_type="f16" />
801
  <input>
802
  <port id="0" precision="U8">
@@ -894,7 +894,7 @@
894
  </port>
895
  </output>
896
  </layer>
897
- <layer id="65" name="Constant_31370" type="Const" version="opset1">
898
  <data element_type="i64" shape="4" offset="68776506" size="32" />
899
  <output>
900
  <port id="0" precision="I64">
@@ -983,13 +983,13 @@
983
  <port id="0" precision="I32" />
984
  </output>
985
  </layer>
986
- <layer id="71" name="__module.model.model.decoder/aten::full/Convert" type="Const" version="opset1">
987
  <data element_type="f32" shape="" offset="70418794" size="4" />
988
  <output>
989
- <port id="0" precision="FP32" />
990
  </output>
991
  </layer>
992
- <layer id="72" name="Constant_31351" type="Const" version="opset1">
993
  <data element_type="i64" shape="1" offset="67118890" size="8" />
994
  <output>
995
  <port id="0" precision="I64">
@@ -997,7 +997,7 @@
997
  </port>
998
  </output>
999
  </layer>
1000
- <layer id="73" name="Reshape_31352" type="Reshape" version="opset1">
1001
  <data special_zero="false" />
1002
  <input>
1003
  <port id="0" precision="I64" />
@@ -1027,13 +1027,13 @@
1027
  <port id="2" precision="I64" names="62,64" />
1028
  </output>
1029
  </layer>
1030
- <layer id="76" name="Constant_23811" type="Const" version="opset1">
1031
  <data element_type="i32" shape="" offset="66544094" size="4" />
1032
  <output>
1033
  <port id="0" precision="I32" />
1034
  </output>
1035
  </layer>
1036
- <layer id="77" name="Unsqueeze_23812" type="Unsqueeze" version="opset1">
1037
  <input>
1038
  <port id="0" precision="I64" />
1039
  <port id="1" precision="I32" />
@@ -1075,7 +1075,7 @@
1075
  </port>
1076
  </output>
1077
  </layer>
1078
- <layer id="80" name="ShapeOf_31353" type="ShapeOf" version="opset3">
1079
  <data output_type="i32" />
1080
  <input>
1081
  <port id="0" precision="FP32">
@@ -1089,19 +1089,19 @@
1089
  </port>
1090
  </output>
1091
  </layer>
1092
- <layer id="81" name="Constant_31354" type="Const" version="opset1">
1093
  <data element_type="i64" shape="" offset="67118890" size="8" />
1094
  <output>
1095
  <port id="0" precision="I64" />
1096
  </output>
1097
  </layer>
1098
- <layer id="82" name="Constant_31355" type="Const" version="opset1">
1099
  <data element_type="i64" shape="" offset="67118882" size="8" />
1100
  <output>
1101
  <port id="0" precision="I64" />
1102
  </output>
1103
  </layer>
1104
- <layer id="83" name="Gather_31356" type="Gather" version="opset8">
1105
  <data batch_dims="0" />
1106
  <input>
1107
  <port id="0" precision="I32">
@@ -1153,7 +1153,7 @@
1153
  <port id="0" precision="I32" />
1154
  </output>
1155
  </layer>
1156
- <layer id="88" name="Convert_31359" type="Convert" version="opset1">
1157
  <data destination_type="i32" />
1158
  <input>
1159
  <port id="0" precision="I64" />
@@ -1295,7 +1295,7 @@
1295
  </port>
1296
  </output>
1297
  </layer>
1298
- <layer id="100" name="Constant_23914" type="Const" version="opset1">
1299
  <data element_type="i64" shape="2" offset="70418798" size="16" />
1300
  <output>
1301
  <port id="0" precision="I64" names="70">
@@ -1412,7 +1412,7 @@
1412
  </port>
1413
  </output>
1414
  </layer>
1415
- <layer id="108" name="Constant_31348" type="Const" version="opset1">
1416
  <data element_type="i64" shape="1" offset="67118882" size="8" />
1417
  <output>
1418
  <port id="0" precision="I64">
@@ -1420,13 +1420,13 @@
1420
  </port>
1421
  </output>
1422
  </layer>
1423
- <layer id="109" name="Constant_31349" type="Const" version="opset1">
1424
  <data element_type="i64" shape="" offset="67118882" size="8" />
1425
  <output>
1426
  <port id="0" precision="I64" />
1427
  </output>
1428
  </layer>
1429
- <layer id="110" name="Gather_31350" type="Gather" version="opset8">
1430
  <data batch_dims="0" />
1431
  <input>
1432
  <port id="0" precision="I64">
@@ -1443,7 +1443,7 @@
1443
  </port>
1444
  </output>
1445
  </layer>
1446
- <layer id="111" name="Constant_30340" type="Const" version="opset1">
1447
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1448
  <output>
1449
  <port id="0" precision="I64">
@@ -1451,7 +1451,7 @@
1451
  </port>
1452
  </output>
1453
  </layer>
1454
- <layer id="112" name="Constant_30342" type="Const" version="opset1">
1455
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1456
  <output>
1457
  <port id="0" precision="I64">
@@ -1459,7 +1459,7 @@
1459
  </port>
1460
  </output>
1461
  </layer>
1462
- <layer id="113" name="Constant_30344" type="Const" version="opset1">
1463
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1464
  <output>
1465
  <port id="0" precision="I64">
@@ -1511,7 +1511,7 @@
1511
  </port>
1512
  </output>
1513
  </layer>
1514
- <layer id="116" name="Constant_24348" type="Const" version="opset1">
1515
  <data element_type="i64" shape="1" offset="67118882" size="8" />
1516
  <output>
1517
  <port id="0" precision="I64">
@@ -1519,7 +1519,7 @@
1519
  </port>
1520
  </output>
1521
  </layer>
1522
- <layer id="117" name="ShapeOf_31364" type="ShapeOf" version="opset3">
1523
  <data output_type="i64" />
1524
  <input>
1525
  <port id="0" precision="FP32">
@@ -1534,7 +1534,7 @@
1534
  </port>
1535
  </output>
1536
  </layer>
1537
- <layer id="118" name="Constant_31365" type="Const" version="opset1">
1538
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1539
  <output>
1540
  <port id="0" precision="I64">
@@ -1542,13 +1542,13 @@
1542
  </port>
1543
  </output>
1544
  </layer>
1545
- <layer id="119" name="Constant_31366" type="Const" version="opset1">
1546
  <data element_type="i64" shape="" offset="67118882" size="8" />
1547
  <output>
1548
  <port id="0" precision="I64" />
1549
  </output>
1550
  </layer>
1551
- <layer id="120" name="Gather_31367" type="Gather" version="opset8">
1552
  <data batch_dims="0" />
1553
  <input>
1554
  <port id="0" precision="I64">
@@ -1565,7 +1565,7 @@
1565
  </port>
1566
  </output>
1567
  </layer>
1568
- <layer id="121" name="Constant_24351" type="Const" version="opset1">
1569
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1570
  <output>
1571
  <port id="0" precision="I64">
@@ -1664,7 +1664,7 @@
1664
  </port>
1665
  </output>
1666
  </layer>
1667
- <layer id="127" name="Convert_42364" type="Convert" version="opset1">
1668
  <data destination_type="f16" />
1669
  <input>
1670
  <port id="0" precision="U8">
@@ -1688,7 +1688,7 @@
1688
  </port>
1689
  </output>
1690
  </layer>
1691
- <layer id="129" name="Convert_42367" type="Convert" version="opset1">
1692
  <data destination_type="f16" />
1693
  <input>
1694
  <port id="0" precision="U8">
@@ -1786,7 +1786,7 @@
1786
  </port>
1787
  </output>
1788
  </layer>
1789
- <layer id="135" name="Constant_31112" type="Const" version="opset1">
1790
  <data element_type="f32" shape="1, 1, 1280" offset="72061062" size="5120" />
1791
  <output>
1792
  <port id="0" precision="FP32">
@@ -1818,7 +1818,7 @@
1818
  </port>
1819
  </output>
1820
  </layer>
1821
- <layer id="137" name="Constant_31371" type="Const" version="opset1">
1822
  <data element_type="i64" shape="4" offset="68776506" size="32" />
1823
  <output>
1824
  <port id="0" precision="I64">
@@ -1930,7 +1930,7 @@
1930
  </port>
1931
  </output>
1932
  </layer>
1933
- <layer id="144" name="Constant_31372" type="Const" version="opset1">
1934
  <data element_type="i64" shape="3" offset="72066182" size="24" />
1935
  <output>
1936
  <port id="0" precision="I64">
@@ -1968,7 +1968,7 @@
1968
  </port>
1969
  </output>
1970
  </layer>
1971
- <layer id="147" name="Convert_42397" type="Convert" version="opset1">
1972
  <data destination_type="f16" />
1973
  <input>
1974
  <port id="0" precision="U8">
@@ -1992,7 +1992,7 @@
1992
  </port>
1993
  </output>
1994
  </layer>
1995
- <layer id="149" name="Convert_42400" type="Convert" version="opset1">
1996
  <data destination_type="f16" />
1997
  <input>
1998
  <port id="0" precision="U8">
@@ -2090,7 +2090,7 @@
2090
  </port>
2091
  </output>
2092
  </layer>
2093
- <layer id="155" name="Constant_31113" type="Const" version="opset1">
2094
  <data element_type="f32" shape="1, 1, 1280" offset="73708446" size="5120" />
2095
  <output>
2096
  <port id="0" precision="FP32">
@@ -2172,7 +2172,7 @@
2172
  </port>
2173
  </output>
2174
  </layer>
2175
- <layer id="160" name="Constant_31114" type="Const" version="opset1">
2176
  <data element_type="f32" shape="1, 1, 1280" offset="73713566" size="5120" />
2177
  <output>
2178
  <port id="0" precision="FP32">
@@ -2204,7 +2204,7 @@
2204
  </port>
2205
  </output>
2206
  </layer>
2207
- <layer id="162" name="Constant_31115" type="Const" version="opset1">
2208
  <data element_type="f32" shape="1, 1, 1280" offset="73718686" size="5120" />
2209
  <output>
2210
  <port id="0" precision="FP32">
@@ -2236,7 +2236,7 @@
2236
  </port>
2237
  </output>
2238
  </layer>
2239
- <layer id="164" name="Constant_31015" type="Const" version="opset1">
2240
  <data element_type="u8" shape="1280, 1280" offset="73723806" size="1638400" />
2241
  <output>
2242
  <port id="0" precision="U8">
@@ -2245,7 +2245,7 @@
2245
  </port>
2246
  </output>
2247
  </layer>
2248
- <layer id="165" name="Convert_42408" type="Convert" version="opset1">
2249
  <data destination_type="f16" />
2250
  <input>
2251
  <port id="0" precision="U8">
@@ -2260,7 +2260,7 @@
2260
  </port>
2261
  </output>
2262
  </layer>
2263
- <layer id="166" name="Constant_31015/zero_point" type="Const" version="opset1">
2264
  <data element_type="u8" shape="1280, 1" offset="75362206" size="1280" />
2265
  <output>
2266
  <port id="0" precision="U8">
@@ -2269,7 +2269,7 @@
2269
  </port>
2270
  </output>
2271
  </layer>
2272
- <layer id="167" name="Convert_42411" type="Convert" version="opset1">
2273
  <data destination_type="f16" />
2274
  <input>
2275
  <port id="0" precision="U8">
@@ -2284,7 +2284,7 @@
2284
  </port>
2285
  </output>
2286
  </layer>
2287
- <layer id="168" name="Constant_31015/zero_point/subtract" type="Subtract" version="opset1">
2288
  <data auto_broadcast="numpy" />
2289
  <input>
2290
  <port id="0" precision="FP16">
@@ -2303,7 +2303,7 @@
2303
  </port>
2304
  </output>
2305
  </layer>
2306
- <layer id="169" name="Constant_31015/scale" type="Const" version="opset1">
2307
  <data element_type="f16" shape="1280, 1" offset="75363486" size="2560" />
2308
  <output>
2309
  <port id="0" precision="FP16">
@@ -2312,7 +2312,7 @@
2312
  </port>
2313
  </output>
2314
  </layer>
2315
- <layer id="170" name="Constant_31015/fq_weights_1" type="Multiply" version="opset1">
2316
  <data auto_broadcast="numpy" />
2317
  <input>
2318
  <port id="0" precision="FP16">
@@ -2331,7 +2331,7 @@
2331
  </port>
2332
  </output>
2333
  </layer>
2334
- <layer id="171" name="Constant_31015/fq_weights_1/convert" type="Convert" version="opset1">
2335
  <data destination_type="f32" />
2336
  <input>
2337
  <port id="0" precision="FP16">
@@ -2346,7 +2346,7 @@
2346
  </port>
2347
  </output>
2348
  </layer>
2349
- <layer id="172" name="Multiply_30999" type="MatMul" version="opset1">
2350
  <data transpose_a="false" transpose_b="true" />
2351
  <input>
2352
  <port id="0" precision="FP32">
@@ -2367,7 +2367,7 @@
2367
  </port>
2368
  </output>
2369
  </layer>
2370
- <layer id="173" name="Constant_31116" type="Const" version="opset1">
2371
  <data element_type="f32" shape="1, 1, 1280" offset="75366046" size="5120" />
2372
  <output>
2373
  <port id="0" precision="FP32">
@@ -2399,7 +2399,7 @@
2399
  </port>
2400
  </output>
2401
  </layer>
2402
- <layer id="175" name="Constant_31373" type="Const" version="opset1">
2403
  <data element_type="i64" shape="4" offset="68776506" size="32" />
2404
  <output>
2405
  <port id="0" precision="I64">
@@ -2466,7 +2466,7 @@
2466
  </port>
2467
  </output>
2468
  </layer>
2469
- <layer id="180" name="Convert_42353" type="Convert" version="opset1">
2470
  <data destination_type="f16" />
2471
  <input>
2472
  <port id="0" precision="U8">
@@ -2490,7 +2490,7 @@
2490
  </port>
2491
  </output>
2492
  </layer>
2493
- <layer id="182" name="Convert_42356" type="Convert" version="opset1">
2494
  <data destination_type="f16" />
2495
  <input>
2496
  <port id="0" precision="U8">
@@ -2588,7 +2588,7 @@
2588
  </port>
2589
  </output>
2590
  </layer>
2591
- <layer id="188" name="Constant_24572" type="Const" version="opset1">
2592
  <data element_type="i64" shape="1" offset="77013406" size="8" />
2593
  <output>
2594
  <port id="0" precision="I64">
@@ -2596,7 +2596,7 @@
2596
  </port>
2597
  </output>
2598
  </layer>
2599
- <layer id="189" name="Constant_24573" type="Const" version="opset1">
2600
  <data element_type="i64" shape="1" offset="77013414" size="8" />
2601
  <output>
2602
  <port id="0" precision="I64">
@@ -2604,7 +2604,7 @@
2604
  </port>
2605
  </output>
2606
  </layer>
2607
- <layer id="190" name="Constant_24574" type="Const" version="opset1">
2608
  <data element_type="i64" shape="1" offset="77013422" size="8" />
2609
  <output>
2610
  <port id="0" precision="I64">
@@ -2612,7 +2612,7 @@
2612
  </port>
2613
  </output>
2614
  </layer>
2615
- <layer id="191" name="__module.model.model.decoder.layers.0.encoder_attn/prim::ListConstruct_1" type="Concat" version="opset1">
2616
  <data axis="0" />
2617
  <input>
2618
  <port id="0" precision="I64">
@@ -2737,7 +2737,7 @@
2737
  </port>
2738
  </output>
2739
  </layer>
2740
- <layer id="198" name="Convert_42342" type="Convert" version="opset1">
2741
  <data destination_type="f16" />
2742
  <input>
2743
  <port id="0" precision="U8">
@@ -2761,7 +2761,7 @@
2761
  </port>
2762
  </output>
2763
  </layer>
2764
- <layer id="200" name="Convert_42345" type="Convert" version="opset1">
2765
  <data destination_type="f16" />
2766
  <input>
2767
  <port id="0" precision="U8">
@@ -2859,7 +2859,7 @@
2859
  </port>
2860
  </output>
2861
  </layer>
2862
- <layer id="206" name="Constant_31117" type="Const" version="opset1">
2863
  <data element_type="f32" shape="1, 1, 1280" offset="78655670" size="5120" />
2864
  <output>
2865
  <port id="0" precision="FP32">
@@ -2995,7 +2995,7 @@
2995
  </port>
2996
  </output>
2997
  </layer>
2998
- <layer id="214" name="Constant_31374" type="Const" version="opset1">
2999
  <data element_type="i64" shape="3" offset="78660790" size="24" />
3000
  <output>
3001
  <port id="0" precision="I64">
@@ -3033,7 +3033,7 @@
3033
  </port>
3034
  </output>
3035
  </layer>
3036
- <layer id="217" name="Convert_42419" type="Convert" version="opset1">
3037
  <data destination_type="f16" />
3038
  <input>
3039
  <port id="0" precision="U8">
@@ -3057,7 +3057,7 @@
3057
  </port>
3058
  </output>
3059
  </layer>
3060
- <layer id="219" name="Convert_42422" type="Convert" version="opset1">
3061
  <data destination_type="f16" />
3062
  <input>
3063
  <port id="0" precision="U8">
@@ -3155,7 +3155,7 @@
3155
  </port>
3156
  </output>
3157
  </layer>
3158
- <layer id="225" name="Constant_31118" type="Const" version="opset1">
3159
  <data element_type="f32" shape="1, 1, 1280" offset="80303054" size="5120" />
3160
  <output>
3161
  <port id="0" precision="FP32">
@@ -3237,7 +3237,7 @@
3237
  </port>
3238
  </output>
3239
  </layer>
3240
- <layer id="230" name="Constant_31119" type="Const" version="opset1">
3241
  <data element_type="f32" shape="1, 1, 1280" offset="80308174" size="5120" />
3242
  <output>
3243
  <port id="0" precision="FP32">
@@ -3269,7 +3269,7 @@
3269
  </port>
3270
  </output>
3271
  </layer>
3272
- <layer id="232" name="Constant_31120" type="Const" version="opset1">
3273
  <data element_type="f32" shape="1, 1, 1280" offset="80313294" size="5120" />
3274
  <output>
3275
  <port id="0" precision="FP32">
@@ -3310,7 +3310,7 @@
3310
  </port>
3311
  </output>
3312
  </layer>
3313
- <layer id="235" name="Convert_42276" type="Convert" version="opset1">
3314
  <data destination_type="f16" />
3315
  <input>
3316
  <port id="0" precision="U8">
@@ -3334,7 +3334,7 @@
3334
  </port>
3335
  </output>
3336
  </layer>
3337
- <layer id="237" name="Convert_42279" type="Convert" version="opset1">
3338
  <data destination_type="f16" />
3339
  <input>
3340
  <port id="0" precision="U8">
@@ -3432,7 +3432,7 @@
3432
  </port>
3433
  </output>
3434
  </layer>
3435
- <layer id="243" name="Constant_31121" type="Const" version="opset1">
3436
  <data element_type="f32" shape="1, 1, 5120" offset="86887374" size="20480" />
3437
  <output>
3438
  <port id="0" precision="FP32">
@@ -3490,7 +3490,7 @@
3490
  </port>
3491
  </output>
3492
  </layer>
3493
- <layer id="247" name="Convert_42287" type="Convert" version="opset1">
3494
  <data destination_type="f16" />
3495
  <input>
3496
  <port id="0" precision="U8">
@@ -3514,7 +3514,7 @@
3514
  </port>
3515
  </output>
3516
  </layer>
3517
- <layer id="249" name="Convert_42290" type="Convert" version="opset1">
3518
  <data destination_type="f16" />
3519
  <input>
3520
  <port id="0" precision="U8">
@@ -3612,7 +3612,7 @@
3612
  </port>
3613
  </output>
3614
  </layer>
3615
- <layer id="255" name="Constant_31122" type="Const" version="opset1">
3616
  <data element_type="f32" shape="1, 1, 1280" offset="93465294" size="5120" />
3617
  <output>
3618
  <port id="0" precision="FP32">
@@ -3694,7 +3694,7 @@
3694
  </port>
3695
  </output>
3696
  </layer>
3697
- <layer id="260" name="Constant_31123" type="Const" version="opset1">
3698
  <data element_type="f32" shape="1, 1, 1280" offset="93470414" size="5120" />
3699
  <output>
3700
  <port id="0" precision="FP32">
@@ -3726,7 +3726,7 @@
3726
  </port>
3727
  </output>
3728
  </layer>
3729
- <layer id="262" name="Constant_31124" type="Const" version="opset1">
3730
  <data element_type="f32" shape="1, 1, 1280" offset="93475534" size="5120" />
3731
  <output>
3732
  <port id="0" precision="FP32">
@@ -3758,7 +3758,7 @@
3758
  </port>
3759
  </output>
3760
  </layer>
3761
- <layer id="264" name="Constant_31018" type="Const" version="opset1">
3762
  <data element_type="u8" shape="1280, 1280" offset="93480654" size="1638400" />
3763
  <output>
3764
  <port id="0" precision="U8">
@@ -3767,7 +3767,7 @@
3767
  </port>
3768
  </output>
3769
  </layer>
3770
- <layer id="265" name="Convert_42452" type="Convert" version="opset1">
3771
  <data destination_type="f16" />
3772
  <input>
3773
  <port id="0" precision="U8">
@@ -3782,7 +3782,7 @@
3782
  </port>
3783
  </output>
3784
  </layer>
3785
- <layer id="266" name="Constant_31018/zero_point" type="Const" version="opset1">
3786
  <data element_type="u8" shape="1280, 1" offset="95119054" size="1280" />
3787
  <output>
3788
  <port id="0" precision="U8">
@@ -3791,7 +3791,7 @@
3791
  </port>
3792
  </output>
3793
  </layer>
3794
- <layer id="267" name="Convert_42455" type="Convert" version="opset1">
3795
  <data destination_type="f16" />
3796
  <input>
3797
  <port id="0" precision="U8">
@@ -3806,7 +3806,7 @@
3806
  </port>
3807
  </output>
3808
  </layer>
3809
- <layer id="268" name="Constant_31018/zero_point/subtract" type="Subtract" version="opset1">
3810
  <data auto_broadcast="numpy" />
3811
  <input>
3812
  <port id="0" precision="FP16">
@@ -3825,7 +3825,7 @@
3825
  </port>
3826
  </output>
3827
  </layer>
3828
- <layer id="269" name="Constant_31018/scale" type="Const" version="opset1">
3829
  <data element_type="f16" shape="1280, 1" offset="95120334" size="2560" />
3830
  <output>
3831
  <port id="0" precision="FP16">
@@ -3834,7 +3834,7 @@
3834
  </port>
3835
  </output>
3836
  </layer>
3837
- <layer id="270" name="Constant_31018/fq_weights_1" type="Multiply" version="opset1">
3838
  <data auto_broadcast="numpy" />
3839
  <input>
3840
  <port id="0" precision="FP16">
@@ -3853,7 +3853,7 @@
3853
  </port>
3854
  </output>
3855
  </layer>
3856
- <layer id="271" name="Constant_31018/fq_weights_1/convert" type="Convert" version="opset1">
3857
  <data destination_type="f32" />
3858
  <input>
3859
  <port id="0" precision="FP16">
@@ -3868,7 +3868,7 @@
3868
  </port>
3869
  </output>
3870
  </layer>
3871
- <layer id="272" name="Multiply_31003" type="MatMul" version="opset1">
3872
  <data transpose_a="false" transpose_b="true" />
3873
  <input>
3874
  <port id="0" precision="FP32">
@@ -3889,7 +3889,7 @@
3889
  </port>
3890
  </output>
3891
  </layer>
3892
- <layer id="273" name="Constant_31125" type="Const" version="opset1">
3893
  <data element_type="f32" shape="1, 1, 1280" offset="95122894" size="5120" />
3894
  <output>
3895
  <port id="0" precision="FP32">
@@ -3921,7 +3921,7 @@
3921
  </port>
3922
  </output>
3923
  </layer>
3924
- <layer id="275" name="Constant_31375" type="Const" version="opset1">
3925
  <data element_type="i64" shape="4" offset="68776506" size="32" />
3926
  <output>
3927
  <port id="0" precision="I64">
@@ -3988,7 +3988,7 @@
3988
  </port>
3989
  </output>
3990
  </layer>
3991
- <layer id="280" name="Convert_42441" type="Convert" version="opset1">
3992
  <data destination_type="f16" />
3993
  <input>
3994
  <port id="0" precision="U8">
@@ -4012,7 +4012,7 @@
4012
  </port>
4013
  </output>
4014
  </layer>
4015
- <layer id="282" name="Convert_42444" type="Convert" version="opset1">
4016
  <data destination_type="f16" />
4017
  <input>
4018
  <port id="0" precision="U8">
@@ -4110,7 +4110,7 @@
4110
  </port>
4111
  </output>
4112
  </layer>
4113
- <layer id="288" name="Constant_31376" type="Const" version="opset1">
4114
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4115
  <output>
4116
  <port id="0" precision="I64">
@@ -4246,7 +4246,7 @@
4246
  </port>
4247
  </output>
4248
  </layer>
4249
- <layer id="296" name="Convert_42430" type="Convert" version="opset1">
4250
  <data destination_type="f16" />
4251
  <input>
4252
  <port id="0" precision="U8">
@@ -4270,7 +4270,7 @@
4270
  </port>
4271
  </output>
4272
  </layer>
4273
- <layer id="298" name="Convert_42433" type="Convert" version="opset1">
4274
  <data destination_type="f16" />
4275
  <input>
4276
  <port id="0" precision="U8">
@@ -4368,7 +4368,7 @@
4368
  </port>
4369
  </output>
4370
  </layer>
4371
- <layer id="304" name="Constant_31126" type="Const" version="opset1">
4372
  <data element_type="f32" shape="1, 1, 1280" offset="98412494" size="5120" />
4373
  <output>
4374
  <port id="0" precision="FP32">
@@ -4400,7 +4400,7 @@
4400
  </port>
4401
  </output>
4402
  </layer>
4403
- <layer id="306" name="Constant_31377" type="Const" version="opset1">
4404
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4405
  <output>
4406
  <port id="0" precision="I64">
@@ -4512,7 +4512,7 @@
4512
  </port>
4513
  </output>
4514
  </layer>
4515
- <layer id="313" name="Constant_31378" type="Const" version="opset1">
4516
  <data element_type="i64" shape="3" offset="72066182" size="24" />
4517
  <output>
4518
  <port id="0" precision="I64">
@@ -4550,7 +4550,7 @@
4550
  </port>
4551
  </output>
4552
  </layer>
4553
- <layer id="316" name="Convert_42463" type="Convert" version="opset1">
4554
  <data destination_type="f16" />
4555
  <input>
4556
  <port id="0" precision="U8">
@@ -4574,7 +4574,7 @@
4574
  </port>
4575
  </output>
4576
  </layer>
4577
- <layer id="318" name="Convert_42466" type="Convert" version="opset1">
4578
  <data destination_type="f16" />
4579
  <input>
4580
  <port id="0" precision="U8">
@@ -4672,7 +4672,7 @@
4672
  </port>
4673
  </output>
4674
  </layer>
4675
- <layer id="324" name="Constant_31127" type="Const" version="opset1">
4676
  <data element_type="f32" shape="1, 1, 1280" offset="100059854" size="5120" />
4677
  <output>
4678
  <port id="0" precision="FP32">
@@ -4754,7 +4754,7 @@
4754
  </port>
4755
  </output>
4756
  </layer>
4757
- <layer id="329" name="Constant_31128" type="Const" version="opset1">
4758
  <data element_type="f32" shape="1, 1, 1280" offset="100064974" size="5120" />
4759
  <output>
4760
  <port id="0" precision="FP32">
@@ -4786,7 +4786,7 @@
4786
  </port>
4787
  </output>
4788
  </layer>
4789
- <layer id="331" name="Constant_31129" type="Const" version="opset1">
4790
  <data element_type="f32" shape="1, 1, 1280" offset="100070094" size="5120" />
4791
  <output>
4792
  <port id="0" precision="FP32">
@@ -4818,7 +4818,7 @@
4818
  </port>
4819
  </output>
4820
  </layer>
4821
- <layer id="333" name="Constant_31021" type="Const" version="opset1">
4822
  <data element_type="u8" shape="1280, 1280" offset="100075214" size="1638400" />
4823
  <output>
4824
  <port id="0" precision="U8">
@@ -4827,7 +4827,7 @@
4827
  </port>
4828
  </output>
4829
  </layer>
4830
- <layer id="334" name="Convert_42474" type="Convert" version="opset1">
4831
  <data destination_type="f16" />
4832
  <input>
4833
  <port id="0" precision="U8">
@@ -4842,7 +4842,7 @@
4842
  </port>
4843
  </output>
4844
  </layer>
4845
- <layer id="335" name="Constant_31021/zero_point" type="Const" version="opset1">
4846
  <data element_type="u8" shape="1280, 1" offset="101713614" size="1280" />
4847
  <output>
4848
  <port id="0" precision="U8">
@@ -4851,7 +4851,7 @@
4851
  </port>
4852
  </output>
4853
  </layer>
4854
- <layer id="336" name="Convert_42477" type="Convert" version="opset1">
4855
  <data destination_type="f16" />
4856
  <input>
4857
  <port id="0" precision="U8">
@@ -4866,7 +4866,7 @@
4866
  </port>
4867
  </output>
4868
  </layer>
4869
- <layer id="337" name="Constant_31021/zero_point/subtract" type="Subtract" version="opset1">
4870
  <data auto_broadcast="numpy" />
4871
  <input>
4872
  <port id="0" precision="FP16">
@@ -4885,7 +4885,7 @@
4885
  </port>
4886
  </output>
4887
  </layer>
4888
- <layer id="338" name="Constant_31021/scale" type="Const" version="opset1">
4889
  <data element_type="f16" shape="1280, 1" offset="101714894" size="2560" />
4890
  <output>
4891
  <port id="0" precision="FP16">
@@ -4894,7 +4894,7 @@
4894
  </port>
4895
  </output>
4896
  </layer>
4897
- <layer id="339" name="Constant_31021/fq_weights_1" type="Multiply" version="opset1">
4898
  <data auto_broadcast="numpy" />
4899
  <input>
4900
  <port id="0" precision="FP16">
@@ -4913,7 +4913,7 @@
4913
  </port>
4914
  </output>
4915
  </layer>
4916
- <layer id="340" name="Constant_31021/fq_weights_1/convert" type="Convert" version="opset1">
4917
  <data destination_type="f32" />
4918
  <input>
4919
  <port id="0" precision="FP16">
@@ -4928,7 +4928,7 @@
4928
  </port>
4929
  </output>
4930
  </layer>
4931
- <layer id="341" name="Multiply_31007" type="MatMul" version="opset1">
4932
  <data transpose_a="false" transpose_b="true" />
4933
  <input>
4934
  <port id="0" precision="FP32">
@@ -4949,7 +4949,7 @@
4949
  </port>
4950
  </output>
4951
  </layer>
4952
- <layer id="342" name="Constant_31130" type="Const" version="opset1">
4953
  <data element_type="f32" shape="1, 1, 1280" offset="101717454" size="5120" />
4954
  <output>
4955
  <port id="0" precision="FP32">
@@ -4981,7 +4981,7 @@
4981
  </port>
4982
  </output>
4983
  </layer>
4984
- <layer id="344" name="Constant_31379" type="Const" version="opset1">
4985
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4986
  <output>
4987
  <port id="0" precision="I64">
@@ -5048,7 +5048,7 @@
5048
  </port>
5049
  </output>
5050
  </layer>
5051
- <layer id="349" name="Convert_42331" type="Convert" version="opset1">
5052
  <data destination_type="f16" />
5053
  <input>
5054
  <port id="0" precision="U8">
@@ -5072,7 +5072,7 @@
5072
  </port>
5073
  </output>
5074
  </layer>
5075
- <layer id="351" name="Convert_42334" type="Convert" version="opset1">
5076
  <data destination_type="f16" />
5077
  <input>
5078
  <port id="0" precision="U8">
@@ -5273,7 +5273,7 @@
5273
  </port>
5274
  </output>
5275
  </layer>
5276
- <layer id="363" name="Convert_42320" type="Convert" version="opset1">
5277
  <data destination_type="f16" />
5278
  <input>
5279
  <port id="0" precision="U8">
@@ -5297,7 +5297,7 @@
5297
  </port>
5298
  </output>
5299
  </layer>
5300
- <layer id="365" name="Convert_42323" type="Convert" version="opset1">
5301
  <data destination_type="f16" />
5302
  <input>
5303
  <port id="0" precision="U8">
@@ -5395,7 +5395,7 @@
5395
  </port>
5396
  </output>
5397
  </layer>
5398
- <layer id="371" name="Constant_31131" type="Const" version="opset1">
5399
  <data element_type="f32" shape="1, 1, 1280" offset="105007054" size="5120" />
5400
  <output>
5401
  <port id="0" precision="FP32">
@@ -5531,7 +5531,7 @@
5531
  </port>
5532
  </output>
5533
  </layer>
5534
- <layer id="379" name="Constant_31380" type="Const" version="opset1">
5535
  <data element_type="i64" shape="3" offset="78660790" size="24" />
5536
  <output>
5537
  <port id="0" precision="I64">
@@ -5569,7 +5569,7 @@
5569
  </port>
5570
  </output>
5571
  </layer>
5572
- <layer id="382" name="Convert_42485" type="Convert" version="opset1">
5573
  <data destination_type="f16" />
5574
  <input>
5575
  <port id="0" precision="U8">
@@ -5593,7 +5593,7 @@
5593
  </port>
5594
  </output>
5595
  </layer>
5596
- <layer id="384" name="Convert_42488" type="Convert" version="opset1">
5597
  <data destination_type="f16" />
5598
  <input>
5599
  <port id="0" precision="U8">
@@ -5691,7 +5691,7 @@
5691
  </port>
5692
  </output>
5693
  </layer>
5694
- <layer id="390" name="Constant_31132" type="Const" version="opset1">
5695
  <data element_type="f32" shape="1, 1, 1280" offset="106654414" size="5120" />
5696
  <output>
5697
  <port id="0" precision="FP32">
@@ -5773,7 +5773,7 @@
5773
  </port>
5774
  </output>
5775
  </layer>
5776
- <layer id="395" name="Constant_31133" type="Const" version="opset1">
5777
  <data element_type="f32" shape="1, 1, 1280" offset="106659534" size="5120" />
5778
  <output>
5779
  <port id="0" precision="FP32">
@@ -5805,7 +5805,7 @@
5805
  </port>
5806
  </output>
5807
  </layer>
5808
- <layer id="397" name="Constant_31134" type="Const" version="opset1">
5809
  <data element_type="f32" shape="1, 1, 1280" offset="106664654" size="5120" />
5810
  <output>
5811
  <port id="0" precision="FP32">
@@ -5846,7 +5846,7 @@
5846
  </port>
5847
  </output>
5848
  </layer>
5849
- <layer id="400" name="Convert_42298" type="Convert" version="opset1">
5850
  <data destination_type="f16" />
5851
  <input>
5852
  <port id="0" precision="U8">
@@ -5870,7 +5870,7 @@
5870
  </port>
5871
  </output>
5872
  </layer>
5873
- <layer id="402" name="Convert_42301" type="Convert" version="opset1">
5874
  <data destination_type="f16" />
5875
  <input>
5876
  <port id="0" precision="U8">
@@ -5968,7 +5968,7 @@
5968
  </port>
5969
  </output>
5970
  </layer>
5971
- <layer id="408" name="Constant_31135" type="Const" version="opset1">
5972
  <data element_type="f32" shape="1, 1, 5120" offset="113238734" size="20480" />
5973
  <output>
5974
  <port id="0" precision="FP32">
@@ -6026,7 +6026,7 @@
6026
  </port>
6027
  </output>
6028
  </layer>
6029
- <layer id="412" name="Convert_42309" type="Convert" version="opset1">
6030
  <data destination_type="f16" />
6031
  <input>
6032
  <port id="0" precision="U8">
@@ -6050,7 +6050,7 @@
6050
  </port>
6051
  </output>
6052
  </layer>
6053
- <layer id="414" name="Convert_42312" type="Convert" version="opset1">
6054
  <data destination_type="f16" />
6055
  <input>
6056
  <port id="0" precision="U8">
@@ -6148,7 +6148,7 @@
6148
  </port>
6149
  </output>
6150
  </layer>
6151
- <layer id="420" name="Constant_31136" type="Const" version="opset1">
6152
  <data element_type="f32" shape="1, 1, 1280" offset="119816654" size="5120" />
6153
  <output>
6154
  <port id="0" precision="FP32">
@@ -6230,7 +6230,7 @@
6230
  </port>
6231
  </output>
6232
  </layer>
6233
- <layer id="425" name="Constant_31137" type="Const" version="opset1">
6234
  <data element_type="f32" shape="1, 1, 1280" offset="119821774" size="5120" />
6235
  <output>
6236
  <port id="0" precision="FP32">
@@ -6262,7 +6262,7 @@
6262
  </port>
6263
  </output>
6264
  </layer>
6265
- <layer id="427" name="Constant_31138" type="Const" version="opset1">
6266
  <data element_type="f32" shape="1, 1, 1280" offset="119826894" size="5120" />
6267
  <output>
6268
  <port id="0" precision="FP32">
@@ -6315,7 +6315,7 @@
6315
  </port>
6316
  </output>
6317
  </layer>
6318
- <layer id="430" name="Result_25933" type="Result" version="opset1">
6319
  <input>
6320
  <port id="0" precision="FP32">
6321
  <dim>-1</dim>
@@ -6792,7 +6792,7 @@
6792
  <edge from-layer="429" from-port="2" to-layer="430" to-port="0" />
6793
  </edges>
6794
  <rt_info>
6795
- <Runtime_version value="2024.5.0-16901-32aaa2fbd96" />
6796
  <conversion_parameters>
6797
  <framework value="pytorch" />
6798
  <is_python_object value="True" />
@@ -6800,9 +6800,10 @@
6800
  <nncf>
6801
  <friendly_names_were_updated value="True" />
6802
  <weight_compression>
6803
- <advanced_parameters value="{'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}}" />
6804
  <all_layers value="False" />
6805
  <awq value="False" />
 
6806
  <gptq value="False" />
6807
  <group_size value="-1" />
6808
  <ignored_scope value="[]" />
@@ -6814,10 +6815,10 @@
6814
  </weight_compression>
6815
  </nncf>
6816
  <optimum>
6817
- <optimum_intel_version value="1.20.0.dev0+2559620" />
6818
- <optimum_version value="1.23.1" />
6819
- <pytorch_version value="2.5.0" />
6820
- <transformers_version value="4.45.2" />
6821
  </optimum>
6822
  </rt_info>
6823
  </net>
 
10
  </port>
11
  </output>
12
  </layer>
13
+ <layer id="0" name="Parameter_26207" type="Parameter" version="opset1">
14
  <data shape="?,?,?" element_type="f32" />
15
  <output>
16
  <port id="0" precision="FP32" names="encoder_hidden_states">
 
29
  </port>
30
  </output>
31
  </layer>
32
+ <layer id="3" name="Convert_42605" type="Convert" version="opset1">
33
  <data destination_type="f16" />
34
  <input>
35
  <port id="0" precision="U8">
 
53
  </port>
54
  </output>
55
  </layer>
56
+ <layer id="5" name="Convert_42608" type="Convert" version="opset1">
57
  <data destination_type="f16" />
58
  <input>
59
  <port id="0" precision="U8">
 
130
  </port>
131
  </output>
132
  </layer>
133
+ <layer id="10" name="Constant_31708" type="Const" version="opset1">
134
  <data element_type="i64" shape="2" offset="66544078" size="16" />
135
  <output>
136
  <port id="0" precision="I64">
 
207
  </port>
208
  </output>
209
  </layer>
210
+ <layer id="16" name="Convert_42836" type="Convert" version="opset1">
211
  <data destination_type="f16" />
212
  <input>
213
  <port id="0" precision="U8">
 
231
  </port>
232
  </output>
233
  </layer>
234
+ <layer id="18" name="Convert_42839" type="Convert" version="opset1">
235
  <data destination_type="f16" />
236
  <input>
237
  <port id="0" precision="U8">
 
314
  <port id="0" precision="I64" names="23" />
315
  </output>
316
  </layer>
317
+ <layer id="24" name="ShapeOf_31624" type="ShapeOf" version="opset3">
318
  <data output_type="i64" />
319
  <input>
320
  <port id="0" precision="I64">
 
328
  </port>
329
  </output>
330
  </layer>
331
+ <layer id="25" name="Constant_31625" type="Const" version="opset1">
332
  <data element_type="i64" shape="" offset="67118890" size="8" />
333
  <output>
334
  <port id="0" precision="I64" />
335
  </output>
336
  </layer>
337
+ <layer id="26" name="Constant_31626" type="Const" version="opset1">
338
  <data element_type="i64" shape="" offset="67118882" size="8" />
339
  <output>
340
  <port id="0" precision="I64" />
341
  </output>
342
  </layer>
343
+ <layer id="27" name="Gather_31627" type="Gather" version="opset8">
344
  <data batch_dims="0" />
345
  <input>
346
  <port id="0" precision="I64">
 
401
  </port>
402
  </output>
403
  </layer>
404
+ <layer id="32" name="__module.model.model.decoder.embed_positions/aten::index/Constant" type="Const" version="opset1">
405
  <data element_type="i32" shape="" offset="66544094" size="4" />
406
  <output>
407
  <port id="0" precision="I32" />
 
478
  </port>
479
  </output>
480
  </layer>
481
+ <layer id="37" name="Constant_31390" type="Const" version="opset1">
482
  <data element_type="f32" shape="1, 1, 1280" offset="67118906" size="5120" />
483
  <output>
484
  <port id="0" precision="FP32">
 
510
  </port>
511
  </output>
512
  </layer>
513
+ <layer id="39" name="Constant_31391" type="Const" version="opset1">
514
  <data element_type="f32" shape="1, 1, 1280" offset="67124026" size="5120" />
515
  <output>
516
  <port id="0" precision="FP32">
 
542
  </port>
543
  </output>
544
  </layer>
545
+ <layer id="41" name="Constant_31293" type="Const" version="opset1">
546
  <data element_type="u8" shape="1280, 1280" offset="67129146" size="1638400" />
547
  <output>
548
  <port id="0" precision="U8">
 
551
  </port>
552
  </output>
553
  </layer>
554
+ <layer id="42" name="Convert_42726" type="Convert" version="opset1">
555
  <data destination_type="f16" />
556
  <input>
557
  <port id="0" precision="U8">
 
566
  </port>
567
  </output>
568
  </layer>
569
+ <layer id="43" name="Constant_31293/zero_point" type="Const" version="opset1">
570
  <data element_type="u8" shape="1280, 1" offset="68767546" size="1280" />
571
  <output>
572
  <port id="0" precision="U8">
 
575
  </port>
576
  </output>
577
  </layer>
578
+ <layer id="44" name="Convert_42729" type="Convert" version="opset1">
579
  <data destination_type="f16" />
580
  <input>
581
  <port id="0" precision="U8">
 
590
  </port>
591
  </output>
592
  </layer>
593
+ <layer id="45" name="Constant_31293/zero_point/subtract" type="Subtract" version="opset1">
594
  <data auto_broadcast="numpy" />
595
  <input>
596
  <port id="0" precision="FP16">
 
609
  </port>
610
  </output>
611
  </layer>
612
+ <layer id="46" name="Constant_31293/scale" type="Const" version="opset1">
613
  <data element_type="f16" shape="1280, 1" offset="68768826" size="2560" />
614
  <output>
615
  <port id="0" precision="FP16">
 
618
  </port>
619
  </output>
620
  </layer>
621
+ <layer id="47" name="Constant_31293/fq_weights_1" type="Multiply" version="opset1">
622
  <data auto_broadcast="numpy" />
623
  <input>
624
  <port id="0" precision="FP16">
 
637
  </port>
638
  </output>
639
  </layer>
640
+ <layer id="48" name="Constant_31293/fq_weights_1/convert" type="Convert" version="opset1">
641
  <data destination_type="f32" />
642
  <input>
643
  <port id="0" precision="FP16">
 
652
  </port>
653
  </output>
654
  </layer>
655
+ <layer id="49" name="Multiply_31276" type="MatMul" version="opset1">
656
  <data transpose_a="false" transpose_b="true" />
657
  <input>
658
  <port id="0" precision="FP32">
 
673
  </port>
674
  </output>
675
  </layer>
676
+ <layer id="50" name="Constant_31392" type="Const" version="opset1">
677
  <data element_type="f32" shape="1, 1, 1280" offset="68771386" size="5120" />
678
  <output>
679
  <port id="0" precision="FP32">
 
705
  </port>
706
  </output>
707
  </layer>
708
+ <layer id="52" name="Constant_31709" type="Const" version="opset1">
709
  <data element_type="i64" shape="4" offset="68776506" size="32" />
710
  <output>
711
  <port id="0" precision="I64">
 
772
  </port>
773
  </output>
774
  </layer>
775
+ <layer id="57" name="Convert_42715" type="Convert" version="opset1">
776
  <data destination_type="f16" />
777
  <input>
778
  <port id="0" precision="U8">
 
796
  </port>
797
  </output>
798
  </layer>
799
+ <layer id="59" name="Convert_42718" type="Convert" version="opset1">
800
  <data destination_type="f16" />
801
  <input>
802
  <port id="0" precision="U8">
 
894
  </port>
895
  </output>
896
  </layer>
897
+ <layer id="65" name="Constant_31710" type="Const" version="opset1">
898
  <data element_type="i64" shape="4" offset="68776506" size="32" />
899
  <output>
900
  <port id="0" precision="I64">
 
983
  <port id="0" precision="I32" />
984
  </output>
985
  </layer>
986
+ <layer id="71" name="17" type="Const" version="opset1">
987
  <data element_type="f32" shape="" offset="70418794" size="4" />
988
  <output>
989
+ <port id="0" precision="FP32" names="17" />
990
  </output>
991
  </layer>
992
+ <layer id="72" name="Constant_31648" type="Const" version="opset1">
993
  <data element_type="i64" shape="1" offset="67118890" size="8" />
994
  <output>
995
  <port id="0" precision="I64">
 
997
  </port>
998
  </output>
999
  </layer>
1000
+ <layer id="73" name="Reshape_31649" type="Reshape" version="opset1">
1001
  <data special_zero="false" />
1002
  <input>
1003
  <port id="0" precision="I64" />
 
1027
  <port id="2" precision="I64" names="62,64" />
1028
  </output>
1029
  </layer>
1030
+ <layer id="76" name="Constant_24084" type="Const" version="opset1">
1031
  <data element_type="i32" shape="" offset="66544094" size="4" />
1032
  <output>
1033
  <port id="0" precision="I32" />
1034
  </output>
1035
  </layer>
1036
+ <layer id="77" name="Unsqueeze_24085" type="Unsqueeze" version="opset1">
1037
  <input>
1038
  <port id="0" precision="I64" />
1039
  <port id="1" precision="I32" />
 
1075
  </port>
1076
  </output>
1077
  </layer>
1078
+ <layer id="80" name="ShapeOf_31655" type="ShapeOf" version="opset3">
1079
  <data output_type="i32" />
1080
  <input>
1081
  <port id="0" precision="FP32">
 
1089
  </port>
1090
  </output>
1091
  </layer>
1092
+ <layer id="81" name="Constant_31656" type="Const" version="opset1">
1093
  <data element_type="i64" shape="" offset="67118890" size="8" />
1094
  <output>
1095
  <port id="0" precision="I64" />
1096
  </output>
1097
  </layer>
1098
+ <layer id="82" name="Constant_31657" type="Const" version="opset1">
1099
  <data element_type="i64" shape="" offset="67118882" size="8" />
1100
  <output>
1101
  <port id="0" precision="I64" />
1102
  </output>
1103
  </layer>
1104
+ <layer id="83" name="Gather_31658" type="Gather" version="opset8">
1105
  <data batch_dims="0" />
1106
  <input>
1107
  <port id="0" precision="I32">
 
1153
  <port id="0" precision="I32" />
1154
  </output>
1155
  </layer>
1156
+ <layer id="88" name="Convert_31661" type="Convert" version="opset1">
1157
  <data destination_type="i32" />
1158
  <input>
1159
  <port id="0" precision="I64" />
 
1295
  </port>
1296
  </output>
1297
  </layer>
1298
+ <layer id="100" name="Constant_24187" type="Const" version="opset1">
1299
  <data element_type="i64" shape="2" offset="70418798" size="16" />
1300
  <output>
1301
  <port id="0" precision="I64" names="70">
 
1412
  </port>
1413
  </output>
1414
  </layer>
1415
+ <layer id="108" name="Constant_31639" type="Const" version="opset1">
1416
  <data element_type="i64" shape="1" offset="67118882" size="8" />
1417
  <output>
1418
  <port id="0" precision="I64">
 
1420
  </port>
1421
  </output>
1422
  </layer>
1423
+ <layer id="109" name="Constant_31640" type="Const" version="opset1">
1424
  <data element_type="i64" shape="" offset="67118882" size="8" />
1425
  <output>
1426
  <port id="0" precision="I64" />
1427
  </output>
1428
  </layer>
1429
+ <layer id="110" name="Gather_31641" type="Gather" version="opset8">
1430
  <data batch_dims="0" />
1431
  <input>
1432
  <port id="0" precision="I64">
 
1443
  </port>
1444
  </output>
1445
  </layer>
1446
+ <layer id="111" name="Constant_30625" type="Const" version="opset1">
1447
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1448
  <output>
1449
  <port id="0" precision="I64">
 
1451
  </port>
1452
  </output>
1453
  </layer>
1454
+ <layer id="112" name="Constant_30627" type="Const" version="opset1">
1455
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1456
  <output>
1457
  <port id="0" precision="I64">
 
1459
  </port>
1460
  </output>
1461
  </layer>
1462
+ <layer id="113" name="Constant_30629" type="Const" version="opset1">
1463
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1464
  <output>
1465
  <port id="0" precision="I64">
 
1511
  </port>
1512
  </output>
1513
  </layer>
1514
+ <layer id="116" name="Constant_24621" type="Const" version="opset1">
1515
  <data element_type="i64" shape="1" offset="67118882" size="8" />
1516
  <output>
1517
  <port id="0" precision="I64">
 
1519
  </port>
1520
  </output>
1521
  </layer>
1522
+ <layer id="117" name="ShapeOf_31670" type="ShapeOf" version="opset3">
1523
  <data output_type="i64" />
1524
  <input>
1525
  <port id="0" precision="FP32">
 
1534
  </port>
1535
  </output>
1536
  </layer>
1537
+ <layer id="118" name="Constant_31671" type="Const" version="opset1">
1538
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1539
  <output>
1540
  <port id="0" precision="I64">
 
1542
  </port>
1543
  </output>
1544
  </layer>
1545
+ <layer id="119" name="Constant_31672" type="Const" version="opset1">
1546
  <data element_type="i64" shape="" offset="67118882" size="8" />
1547
  <output>
1548
  <port id="0" precision="I64" />
1549
  </output>
1550
  </layer>
1551
+ <layer id="120" name="Gather_31673" type="Gather" version="opset8">
1552
  <data batch_dims="0" />
1553
  <input>
1554
  <port id="0" precision="I64">
 
1565
  </port>
1566
  </output>
1567
  </layer>
1568
+ <layer id="121" name="Constant_24624" type="Const" version="opset1">
1569
  <data element_type="i64" shape="1" offset="67118890" size="8" />
1570
  <output>
1571
  <port id="0" precision="I64">
 
1664
  </port>
1665
  </output>
1666
  </layer>
1667
+ <layer id="127" name="Convert_42704" type="Convert" version="opset1">
1668
  <data destination_type="f16" />
1669
  <input>
1670
  <port id="0" precision="U8">
 
1688
  </port>
1689
  </output>
1690
  </layer>
1691
+ <layer id="129" name="Convert_42707" type="Convert" version="opset1">
1692
  <data destination_type="f16" />
1693
  <input>
1694
  <port id="0" precision="U8">
 
1786
  </port>
1787
  </output>
1788
  </layer>
1789
+ <layer id="135" name="Constant_31393" type="Const" version="opset1">
1790
  <data element_type="f32" shape="1, 1, 1280" offset="72061062" size="5120" />
1791
  <output>
1792
  <port id="0" precision="FP32">
 
1818
  </port>
1819
  </output>
1820
  </layer>
1821
+ <layer id="137" name="Constant_31711" type="Const" version="opset1">
1822
  <data element_type="i64" shape="4" offset="68776506" size="32" />
1823
  <output>
1824
  <port id="0" precision="I64">
 
1930
  </port>
1931
  </output>
1932
  </layer>
1933
+ <layer id="144" name="Constant_31712" type="Const" version="opset1">
1934
  <data element_type="i64" shape="3" offset="72066182" size="24" />
1935
  <output>
1936
  <port id="0" precision="I64">
 
1968
  </port>
1969
  </output>
1970
  </layer>
1971
+ <layer id="147" name="Convert_42737" type="Convert" version="opset1">
1972
  <data destination_type="f16" />
1973
  <input>
1974
  <port id="0" precision="U8">
 
1992
  </port>
1993
  </output>
1994
  </layer>
1995
+ <layer id="149" name="Convert_42740" type="Convert" version="opset1">
1996
  <data destination_type="f16" />
1997
  <input>
1998
  <port id="0" precision="U8">
 
2090
  </port>
2091
  </output>
2092
  </layer>
2093
+ <layer id="155" name="Constant_31394" type="Const" version="opset1">
2094
  <data element_type="f32" shape="1, 1, 1280" offset="73708446" size="5120" />
2095
  <output>
2096
  <port id="0" precision="FP32">
 
2172
  </port>
2173
  </output>
2174
  </layer>
2175
+ <layer id="160" name="Constant_31395" type="Const" version="opset1">
2176
  <data element_type="f32" shape="1, 1, 1280" offset="73713566" size="5120" />
2177
  <output>
2178
  <port id="0" precision="FP32">
 
2204
  </port>
2205
  </output>
2206
  </layer>
2207
+ <layer id="162" name="Constant_31396" type="Const" version="opset1">
2208
  <data element_type="f32" shape="1, 1, 1280" offset="73718686" size="5120" />
2209
  <output>
2210
  <port id="0" precision="FP32">
 
2236
  </port>
2237
  </output>
2238
  </layer>
2239
+ <layer id="164" name="Constant_31296" type="Const" version="opset1">
2240
  <data element_type="u8" shape="1280, 1280" offset="73723806" size="1638400" />
2241
  <output>
2242
  <port id="0" precision="U8">
 
2245
  </port>
2246
  </output>
2247
  </layer>
2248
+ <layer id="165" name="Convert_42748" type="Convert" version="opset1">
2249
  <data destination_type="f16" />
2250
  <input>
2251
  <port id="0" precision="U8">
 
2260
  </port>
2261
  </output>
2262
  </layer>
2263
+ <layer id="166" name="Constant_31296/zero_point" type="Const" version="opset1">
2264
  <data element_type="u8" shape="1280, 1" offset="75362206" size="1280" />
2265
  <output>
2266
  <port id="0" precision="U8">
 
2269
  </port>
2270
  </output>
2271
  </layer>
2272
+ <layer id="167" name="Convert_42751" type="Convert" version="opset1">
2273
  <data destination_type="f16" />
2274
  <input>
2275
  <port id="0" precision="U8">
 
2284
  </port>
2285
  </output>
2286
  </layer>
2287
+ <layer id="168" name="Constant_31296/zero_point/subtract" type="Subtract" version="opset1">
2288
  <data auto_broadcast="numpy" />
2289
  <input>
2290
  <port id="0" precision="FP16">
 
2303
  </port>
2304
  </output>
2305
  </layer>
2306
+ <layer id="169" name="Constant_31296/scale" type="Const" version="opset1">
2307
  <data element_type="f16" shape="1280, 1" offset="75363486" size="2560" />
2308
  <output>
2309
  <port id="0" precision="FP16">
 
2312
  </port>
2313
  </output>
2314
  </layer>
2315
+ <layer id="170" name="Constant_31296/fq_weights_1" type="Multiply" version="opset1">
2316
  <data auto_broadcast="numpy" />
2317
  <input>
2318
  <port id="0" precision="FP16">
 
2331
  </port>
2332
  </output>
2333
  </layer>
2334
+ <layer id="171" name="Constant_31296/fq_weights_1/convert" type="Convert" version="opset1">
2335
  <data destination_type="f32" />
2336
  <input>
2337
  <port id="0" precision="FP16">
 
2346
  </port>
2347
  </output>
2348
  </layer>
2349
+ <layer id="172" name="Multiply_31280" type="MatMul" version="opset1">
2350
  <data transpose_a="false" transpose_b="true" />
2351
  <input>
2352
  <port id="0" precision="FP32">
 
2367
  </port>
2368
  </output>
2369
  </layer>
2370
+ <layer id="173" name="Constant_31397" type="Const" version="opset1">
2371
  <data element_type="f32" shape="1, 1, 1280" offset="75366046" size="5120" />
2372
  <output>
2373
  <port id="0" precision="FP32">
 
2399
  </port>
2400
  </output>
2401
  </layer>
2402
+ <layer id="175" name="Constant_31713" type="Const" version="opset1">
2403
  <data element_type="i64" shape="4" offset="68776506" size="32" />
2404
  <output>
2405
  <port id="0" precision="I64">
 
2466
  </port>
2467
  </output>
2468
  </layer>
2469
+ <layer id="180" name="Convert_42693" type="Convert" version="opset1">
2470
  <data destination_type="f16" />
2471
  <input>
2472
  <port id="0" precision="U8">
 
2490
  </port>
2491
  </output>
2492
  </layer>
2493
+ <layer id="182" name="Convert_42696" type="Convert" version="opset1">
2494
  <data destination_type="f16" />
2495
  <input>
2496
  <port id="0" precision="U8">
 
2588
  </port>
2589
  </output>
2590
  </layer>
2591
+ <layer id="188" name="Constant_24369" type="Const" version="opset1">
2592
  <data element_type="i64" shape="1" offset="77013406" size="8" />
2593
  <output>
2594
  <port id="0" precision="I64">
 
2596
  </port>
2597
  </output>
2598
  </layer>
2599
+ <layer id="189" name="Constant_24370" type="Const" version="opset1">
2600
  <data element_type="i64" shape="1" offset="77013414" size="8" />
2601
  <output>
2602
  <port id="0" precision="I64">
 
2604
  </port>
2605
  </output>
2606
  </layer>
2607
+ <layer id="190" name="Constant_24371" type="Const" version="opset1">
2608
  <data element_type="i64" shape="1" offset="77013422" size="8" />
2609
  <output>
2610
  <port id="0" precision="I64">
 
2612
  </port>
2613
  </output>
2614
  </layer>
2615
+ <layer id="191" name="__module.model.model.decoder.layers.0.self_attn/prim::ListConstruct_1" type="Concat" version="opset1">
2616
  <data axis="0" />
2617
  <input>
2618
  <port id="0" precision="I64">
 
2737
  </port>
2738
  </output>
2739
  </layer>
2740
+ <layer id="198" name="Convert_42682" type="Convert" version="opset1">
2741
  <data destination_type="f16" />
2742
  <input>
2743
  <port id="0" precision="U8">
 
2761
  </port>
2762
  </output>
2763
  </layer>
2764
+ <layer id="200" name="Convert_42685" type="Convert" version="opset1">
2765
  <data destination_type="f16" />
2766
  <input>
2767
  <port id="0" precision="U8">
 
2859
  </port>
2860
  </output>
2861
  </layer>
2862
+ <layer id="206" name="Constant_31398" type="Const" version="opset1">
2863
  <data element_type="f32" shape="1, 1, 1280" offset="78655670" size="5120" />
2864
  <output>
2865
  <port id="0" precision="FP32">
 
2995
  </port>
2996
  </output>
2997
  </layer>
2998
+ <layer id="214" name="Constant_31714" type="Const" version="opset1">
2999
  <data element_type="i64" shape="3" offset="78660790" size="24" />
3000
  <output>
3001
  <port id="0" precision="I64">
 
3033
  </port>
3034
  </output>
3035
  </layer>
3036
+ <layer id="217" name="Convert_42759" type="Convert" version="opset1">
3037
  <data destination_type="f16" />
3038
  <input>
3039
  <port id="0" precision="U8">
 
3057
  </port>
3058
  </output>
3059
  </layer>
3060
+ <layer id="219" name="Convert_42762" type="Convert" version="opset1">
3061
  <data destination_type="f16" />
3062
  <input>
3063
  <port id="0" precision="U8">
 
3155
  </port>
3156
  </output>
3157
  </layer>
3158
+ <layer id="225" name="Constant_31399" type="Const" version="opset1">
3159
  <data element_type="f32" shape="1, 1, 1280" offset="80303054" size="5120" />
3160
  <output>
3161
  <port id="0" precision="FP32">
 
3237
  </port>
3238
  </output>
3239
  </layer>
3240
+ <layer id="230" name="Constant_31400" type="Const" version="opset1">
3241
  <data element_type="f32" shape="1, 1, 1280" offset="80308174" size="5120" />
3242
  <output>
3243
  <port id="0" precision="FP32">
 
3269
  </port>
3270
  </output>
3271
  </layer>
3272
+ <layer id="232" name="Constant_31401" type="Const" version="opset1">
3273
  <data element_type="f32" shape="1, 1, 1280" offset="80313294" size="5120" />
3274
  <output>
3275
  <port id="0" precision="FP32">
 
3310
  </port>
3311
  </output>
3312
  </layer>
3313
+ <layer id="235" name="Convert_42616" type="Convert" version="opset1">
3314
  <data destination_type="f16" />
3315
  <input>
3316
  <port id="0" precision="U8">
 
3334
  </port>
3335
  </output>
3336
  </layer>
3337
+ <layer id="237" name="Convert_42619" type="Convert" version="opset1">
3338
  <data destination_type="f16" />
3339
  <input>
3340
  <port id="0" precision="U8">
 
3432
  </port>
3433
  </output>
3434
  </layer>
3435
+ <layer id="243" name="Constant_31402" type="Const" version="opset1">
3436
  <data element_type="f32" shape="1, 1, 5120" offset="86887374" size="20480" />
3437
  <output>
3438
  <port id="0" precision="FP32">
 
3490
  </port>
3491
  </output>
3492
  </layer>
3493
+ <layer id="247" name="Convert_42627" type="Convert" version="opset1">
3494
  <data destination_type="f16" />
3495
  <input>
3496
  <port id="0" precision="U8">
 
3514
  </port>
3515
  </output>
3516
  </layer>
3517
+ <layer id="249" name="Convert_42630" type="Convert" version="opset1">
3518
  <data destination_type="f16" />
3519
  <input>
3520
  <port id="0" precision="U8">
 
3612
  </port>
3613
  </output>
3614
  </layer>
3615
+ <layer id="255" name="Constant_31403" type="Const" version="opset1">
3616
  <data element_type="f32" shape="1, 1, 1280" offset="93465294" size="5120" />
3617
  <output>
3618
  <port id="0" precision="FP32">
 
3694
  </port>
3695
  </output>
3696
  </layer>
3697
+ <layer id="260" name="Constant_31404" type="Const" version="opset1">
3698
  <data element_type="f32" shape="1, 1, 1280" offset="93470414" size="5120" />
3699
  <output>
3700
  <port id="0" precision="FP32">
 
3726
  </port>
3727
  </output>
3728
  </layer>
3729
+ <layer id="262" name="Constant_31405" type="Const" version="opset1">
3730
  <data element_type="f32" shape="1, 1, 1280" offset="93475534" size="5120" />
3731
  <output>
3732
  <port id="0" precision="FP32">
 
3758
  </port>
3759
  </output>
3760
  </layer>
3761
+ <layer id="264" name="Constant_31299" type="Const" version="opset1">
3762
  <data element_type="u8" shape="1280, 1280" offset="93480654" size="1638400" />
3763
  <output>
3764
  <port id="0" precision="U8">
 
3767
  </port>
3768
  </output>
3769
  </layer>
3770
+ <layer id="265" name="Convert_42792" type="Convert" version="opset1">
3771
  <data destination_type="f16" />
3772
  <input>
3773
  <port id="0" precision="U8">
 
3782
  </port>
3783
  </output>
3784
  </layer>
3785
+ <layer id="266" name="Constant_31299/zero_point" type="Const" version="opset1">
3786
  <data element_type="u8" shape="1280, 1" offset="95119054" size="1280" />
3787
  <output>
3788
  <port id="0" precision="U8">
 
3791
  </port>
3792
  </output>
3793
  </layer>
3794
+ <layer id="267" name="Convert_42795" type="Convert" version="opset1">
3795
  <data destination_type="f16" />
3796
  <input>
3797
  <port id="0" precision="U8">
 
3806
  </port>
3807
  </output>
3808
  </layer>
3809
+ <layer id="268" name="Constant_31299/zero_point/subtract" type="Subtract" version="opset1">
3810
  <data auto_broadcast="numpy" />
3811
  <input>
3812
  <port id="0" precision="FP16">
 
3825
  </port>
3826
  </output>
3827
  </layer>
3828
+ <layer id="269" name="Constant_31299/scale" type="Const" version="opset1">
3829
  <data element_type="f16" shape="1280, 1" offset="95120334" size="2560" />
3830
  <output>
3831
  <port id="0" precision="FP16">
 
3834
  </port>
3835
  </output>
3836
  </layer>
3837
+ <layer id="270" name="Constant_31299/fq_weights_1" type="Multiply" version="opset1">
3838
  <data auto_broadcast="numpy" />
3839
  <input>
3840
  <port id="0" precision="FP16">
 
3853
  </port>
3854
  </output>
3855
  </layer>
3856
+ <layer id="271" name="Constant_31299/fq_weights_1/convert" type="Convert" version="opset1">
3857
  <data destination_type="f32" />
3858
  <input>
3859
  <port id="0" precision="FP16">
 
3868
  </port>
3869
  </output>
3870
  </layer>
3871
+ <layer id="272" name="Multiply_31284" type="MatMul" version="opset1">
3872
  <data transpose_a="false" transpose_b="true" />
3873
  <input>
3874
  <port id="0" precision="FP32">
 
3889
  </port>
3890
  </output>
3891
  </layer>
3892
+ <layer id="273" name="Constant_31406" type="Const" version="opset1">
3893
  <data element_type="f32" shape="1, 1, 1280" offset="95122894" size="5120" />
3894
  <output>
3895
  <port id="0" precision="FP32">
 
3921
  </port>
3922
  </output>
3923
  </layer>
3924
+ <layer id="275" name="Constant_31715" type="Const" version="opset1">
3925
  <data element_type="i64" shape="4" offset="68776506" size="32" />
3926
  <output>
3927
  <port id="0" precision="I64">
 
3988
  </port>
3989
  </output>
3990
  </layer>
3991
+ <layer id="280" name="Convert_42781" type="Convert" version="opset1">
3992
  <data destination_type="f16" />
3993
  <input>
3994
  <port id="0" precision="U8">
 
4012
  </port>
4013
  </output>
4014
  </layer>
4015
+ <layer id="282" name="Convert_42784" type="Convert" version="opset1">
4016
  <data destination_type="f16" />
4017
  <input>
4018
  <port id="0" precision="U8">
 
4110
  </port>
4111
  </output>
4112
  </layer>
4113
+ <layer id="288" name="Constant_31716" type="Const" version="opset1">
4114
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4115
  <output>
4116
  <port id="0" precision="I64">
 
4246
  </port>
4247
  </output>
4248
  </layer>
4249
+ <layer id="296" name="Convert_42770" type="Convert" version="opset1">
4250
  <data destination_type="f16" />
4251
  <input>
4252
  <port id="0" precision="U8">
 
4270
  </port>
4271
  </output>
4272
  </layer>
4273
+ <layer id="298" name="Convert_42773" type="Convert" version="opset1">
4274
  <data destination_type="f16" />
4275
  <input>
4276
  <port id="0" precision="U8">
 
4368
  </port>
4369
  </output>
4370
  </layer>
4371
+ <layer id="304" name="Constant_31407" type="Const" version="opset1">
4372
  <data element_type="f32" shape="1, 1, 1280" offset="98412494" size="5120" />
4373
  <output>
4374
  <port id="0" precision="FP32">
 
4400
  </port>
4401
  </output>
4402
  </layer>
4403
+ <layer id="306" name="Constant_31717" type="Const" version="opset1">
4404
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4405
  <output>
4406
  <port id="0" precision="I64">
 
4512
  </port>
4513
  </output>
4514
  </layer>
4515
+ <layer id="313" name="Constant_31718" type="Const" version="opset1">
4516
  <data element_type="i64" shape="3" offset="72066182" size="24" />
4517
  <output>
4518
  <port id="0" precision="I64">
 
4550
  </port>
4551
  </output>
4552
  </layer>
4553
+ <layer id="316" name="Convert_42803" type="Convert" version="opset1">
4554
  <data destination_type="f16" />
4555
  <input>
4556
  <port id="0" precision="U8">
 
4574
  </port>
4575
  </output>
4576
  </layer>
4577
+ <layer id="318" name="Convert_42806" type="Convert" version="opset1">
4578
  <data destination_type="f16" />
4579
  <input>
4580
  <port id="0" precision="U8">
 
4672
  </port>
4673
  </output>
4674
  </layer>
4675
+ <layer id="324" name="Constant_31408" type="Const" version="opset1">
4676
  <data element_type="f32" shape="1, 1, 1280" offset="100059854" size="5120" />
4677
  <output>
4678
  <port id="0" precision="FP32">
 
4754
  </port>
4755
  </output>
4756
  </layer>
4757
+ <layer id="329" name="Constant_31409" type="Const" version="opset1">
4758
  <data element_type="f32" shape="1, 1, 1280" offset="100064974" size="5120" />
4759
  <output>
4760
  <port id="0" precision="FP32">
 
4786
  </port>
4787
  </output>
4788
  </layer>
4789
+ <layer id="331" name="Constant_31410" type="Const" version="opset1">
4790
  <data element_type="f32" shape="1, 1, 1280" offset="100070094" size="5120" />
4791
  <output>
4792
  <port id="0" precision="FP32">
 
4818
  </port>
4819
  </output>
4820
  </layer>
4821
+ <layer id="333" name="Constant_31302" type="Const" version="opset1">
4822
  <data element_type="u8" shape="1280, 1280" offset="100075214" size="1638400" />
4823
  <output>
4824
  <port id="0" precision="U8">
 
4827
  </port>
4828
  </output>
4829
  </layer>
4830
+ <layer id="334" name="Convert_42814" type="Convert" version="opset1">
4831
  <data destination_type="f16" />
4832
  <input>
4833
  <port id="0" precision="U8">
 
4842
  </port>
4843
  </output>
4844
  </layer>
4845
+ <layer id="335" name="Constant_31302/zero_point" type="Const" version="opset1">
4846
  <data element_type="u8" shape="1280, 1" offset="101713614" size="1280" />
4847
  <output>
4848
  <port id="0" precision="U8">
 
4851
  </port>
4852
  </output>
4853
  </layer>
4854
+ <layer id="336" name="Convert_42817" type="Convert" version="opset1">
4855
  <data destination_type="f16" />
4856
  <input>
4857
  <port id="0" precision="U8">
 
4866
  </port>
4867
  </output>
4868
  </layer>
4869
+ <layer id="337" name="Constant_31302/zero_point/subtract" type="Subtract" version="opset1">
4870
  <data auto_broadcast="numpy" />
4871
  <input>
4872
  <port id="0" precision="FP16">
 
4885
  </port>
4886
  </output>
4887
  </layer>
4888
+ <layer id="338" name="Constant_31302/scale" type="Const" version="opset1">
4889
  <data element_type="f16" shape="1280, 1" offset="101714894" size="2560" />
4890
  <output>
4891
  <port id="0" precision="FP16">
 
4894
  </port>
4895
  </output>
4896
  </layer>
4897
+ <layer id="339" name="Constant_31302/fq_weights_1" type="Multiply" version="opset1">
4898
  <data auto_broadcast="numpy" />
4899
  <input>
4900
  <port id="0" precision="FP16">
 
4913
  </port>
4914
  </output>
4915
  </layer>
4916
+ <layer id="340" name="Constant_31302/fq_weights_1/convert" type="Convert" version="opset1">
4917
  <data destination_type="f32" />
4918
  <input>
4919
  <port id="0" precision="FP16">
 
4928
  </port>
4929
  </output>
4930
  </layer>
4931
+ <layer id="341" name="Multiply_31288" type="MatMul" version="opset1">
4932
  <data transpose_a="false" transpose_b="true" />
4933
  <input>
4934
  <port id="0" precision="FP32">
 
4949
  </port>
4950
  </output>
4951
  </layer>
4952
+ <layer id="342" name="Constant_31411" type="Const" version="opset1">
4953
  <data element_type="f32" shape="1, 1, 1280" offset="101717454" size="5120" />
4954
  <output>
4955
  <port id="0" precision="FP32">
 
4981
  </port>
4982
  </output>
4983
  </layer>
4984
+ <layer id="344" name="Constant_31719" type="Const" version="opset1">
4985
  <data element_type="i64" shape="4" offset="68776506" size="32" />
4986
  <output>
4987
  <port id="0" precision="I64">
 
5048
  </port>
5049
  </output>
5050
  </layer>
5051
+ <layer id="349" name="Convert_42671" type="Convert" version="opset1">
5052
  <data destination_type="f16" />
5053
  <input>
5054
  <port id="0" precision="U8">
 
5072
  </port>
5073
  </output>
5074
  </layer>
5075
+ <layer id="351" name="Convert_42674" type="Convert" version="opset1">
5076
  <data destination_type="f16" />
5077
  <input>
5078
  <port id="0" precision="U8">
 
5273
  </port>
5274
  </output>
5275
  </layer>
5276
+ <layer id="363" name="Convert_42660" type="Convert" version="opset1">
5277
  <data destination_type="f16" />
5278
  <input>
5279
  <port id="0" precision="U8">
 
5297
  </port>
5298
  </output>
5299
  </layer>
5300
+ <layer id="365" name="Convert_42663" type="Convert" version="opset1">
5301
  <data destination_type="f16" />
5302
  <input>
5303
  <port id="0" precision="U8">
 
5395
  </port>
5396
  </output>
5397
  </layer>
5398
+ <layer id="371" name="Constant_31412" type="Const" version="opset1">
5399
  <data element_type="f32" shape="1, 1, 1280" offset="105007054" size="5120" />
5400
  <output>
5401
  <port id="0" precision="FP32">
 
5531
  </port>
5532
  </output>
5533
  </layer>
5534
+ <layer id="379" name="Constant_31720" type="Const" version="opset1">
5535
  <data element_type="i64" shape="3" offset="78660790" size="24" />
5536
  <output>
5537
  <port id="0" precision="I64">
 
5569
  </port>
5570
  </output>
5571
  </layer>
5572
+ <layer id="382" name="Convert_42825" type="Convert" version="opset1">
5573
  <data destination_type="f16" />
5574
  <input>
5575
  <port id="0" precision="U8">
 
5593
  </port>
5594
  </output>
5595
  </layer>
5596
+ <layer id="384" name="Convert_42828" type="Convert" version="opset1">
5597
  <data destination_type="f16" />
5598
  <input>
5599
  <port id="0" precision="U8">
 
5691
  </port>
5692
  </output>
5693
  </layer>
5694
+ <layer id="390" name="Constant_31413" type="Const" version="opset1">
5695
  <data element_type="f32" shape="1, 1, 1280" offset="106654414" size="5120" />
5696
  <output>
5697
  <port id="0" precision="FP32">
 
5773
  </port>
5774
  </output>
5775
  </layer>
5776
+ <layer id="395" name="Constant_31414" type="Const" version="opset1">
5777
  <data element_type="f32" shape="1, 1, 1280" offset="106659534" size="5120" />
5778
  <output>
5779
  <port id="0" precision="FP32">
 
5805
  </port>
5806
  </output>
5807
  </layer>
5808
+ <layer id="397" name="Constant_31415" type="Const" version="opset1">
5809
  <data element_type="f32" shape="1, 1, 1280" offset="106664654" size="5120" />
5810
  <output>
5811
  <port id="0" precision="FP32">
 
5846
  </port>
5847
  </output>
5848
  </layer>
5849
+ <layer id="400" name="Convert_42638" type="Convert" version="opset1">
5850
  <data destination_type="f16" />
5851
  <input>
5852
  <port id="0" precision="U8">
 
5870
  </port>
5871
  </output>
5872
  </layer>
5873
+ <layer id="402" name="Convert_42641" type="Convert" version="opset1">
5874
  <data destination_type="f16" />
5875
  <input>
5876
  <port id="0" precision="U8">
 
5968
  </port>
5969
  </output>
5970
  </layer>
5971
+ <layer id="408" name="Constant_31416" type="Const" version="opset1">
5972
  <data element_type="f32" shape="1, 1, 5120" offset="113238734" size="20480" />
5973
  <output>
5974
  <port id="0" precision="FP32">
 
6026
  </port>
6027
  </output>
6028
  </layer>
6029
+ <layer id="412" name="Convert_42649" type="Convert" version="opset1">
6030
  <data destination_type="f16" />
6031
  <input>
6032
  <port id="0" precision="U8">
 
6050
  </port>
6051
  </output>
6052
  </layer>
6053
+ <layer id="414" name="Convert_42652" type="Convert" version="opset1">
6054
  <data destination_type="f16" />
6055
  <input>
6056
  <port id="0" precision="U8">
 
6148
  </port>
6149
  </output>
6150
  </layer>
6151
+ <layer id="420" name="Constant_31417" type="Const" version="opset1">
6152
  <data element_type="f32" shape="1, 1, 1280" offset="119816654" size="5120" />
6153
  <output>
6154
  <port id="0" precision="FP32">
 
6230
  </port>
6231
  </output>
6232
  </layer>
6233
+ <layer id="425" name="Constant_31418" type="Const" version="opset1">
6234
  <data element_type="f32" shape="1, 1, 1280" offset="119821774" size="5120" />
6235
  <output>
6236
  <port id="0" precision="FP32">
 
6262
  </port>
6263
  </output>
6264
  </layer>
6265
+ <layer id="427" name="Constant_31419" type="Const" version="opset1">
6266
  <data element_type="f32" shape="1, 1, 1280" offset="119826894" size="5120" />
6267
  <output>
6268
  <port id="0" precision="FP32">
 
6315
  </port>
6316
  </output>
6317
  </layer>
6318
+ <layer id="430" name="Result_26206" type="Result" version="opset1">
6319
  <input>
6320
  <port id="0" precision="FP32">
6321
  <dim>-1</dim>
 
6792
  <edge from-layer="429" from-port="2" to-layer="430" to-port="0" />
6793
  </edges>
6794
  <rt_info>
6795
+ <Runtime_version value="2024.5.0-17285-ea5c1dcfdf9-releases/2024/5" />
6796
  <conversion_parameters>
6797
  <framework value="pytorch" />
6798
  <is_python_object value="True" />
 
6800
  <nncf>
6801
  <friendly_names_were_updated value="True" />
6802
  <weight_compression>
6803
+ <advanced_parameters value="{'statistics_path': None, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}}" />
6804
  <all_layers value="False" />
6805
  <awq value="False" />
6806
+ <backup_mode value="int8_asym" />
6807
  <gptq value="False" />
6808
  <group_size value="-1" />
6809
  <ignored_scope value="[]" />
 
6815
  </weight_compression>
6816
  </nncf>
6817
  <optimum>
6818
+ <optimum_intel_version value="1.21.0.dev0+d357376" />
6819
+ <optimum_version value="1.23.3" />
6820
+ <pytorch_version value="2.5.1" />
6821
+ <transformers_version value="4.46.3" />
6822
  </optimum>
6823
  </rt_info>
6824
  </net>
openvino_detokenizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b55ac04e90f19fede391281a9e5a90169fc646e1e302fe1208c782282b51ceaa
3
  size 528306
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c95aa7a4b53ecb3768d2a6608aa9e419059aef1c75b9b99e6b1c56328281c18
3
  size 528306
openvino_detokenizer.xml CHANGED
@@ -1,16 +1,16 @@
1
  <?xml version="1.0"?>
2
  <net name="detokenizer" version="11">
3
  <layers>
4
- <layer id="0" name="Parameter_32420" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
6
  <output>
7
- <port id="0" precision="I64" names="Parameter_32420">
8
  <dim>-1</dim>
9
  <dim>-1</dim>
10
  </port>
11
  </output>
12
  </layer>
13
- <layer id="1" name="Convert_32436" type="Convert" version="opset1">
14
  <data destination_type="i32" />
15
  <input>
16
  <port id="0" precision="I64">
@@ -25,7 +25,7 @@
25
  </port>
26
  </output>
27
  </layer>
28
- <layer id="2" name="Constant_32381" type="Const" version="opset1">
29
  <data element_type="u8" shape="528257" offset="0" size="528257" />
30
  <output>
31
  <port id="0" precision="U8">
@@ -33,7 +33,7 @@
33
  </port>
34
  </output>
35
  </layer>
36
- <layer id="3" name="StringTensorUnpack_32382" type="StringTensorUnpack" version="extension">
37
  <data mode="begins_ends" />
38
  <input>
39
  <port id="0" precision="U8">
@@ -52,7 +52,7 @@
52
  </port>
53
  </output>
54
  </layer>
55
- <layer id="4" name="VocabDecoder_32421" type="VocabDecoder" version="extension">
56
  <data skip_tokens="50257, 50258, 50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269, 50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280, 50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291, 50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302, 50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313, 50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324, 50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335, 50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346, 50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357, 50358, 50359, 50360, 50361, 50362, 50363, 50364" />
57
  <input>
58
  <port id="0" precision="I32">
@@ -87,7 +87,7 @@
87
  </port>
88
  </output>
89
  </layer>
90
- <layer id="5" name="FuzeRagged_32422" type="FuzeRagged" version="extension">
91
  <input>
92
  <port id="0" precision="I32">
93
  <dim>-1</dim>
@@ -111,7 +111,7 @@
111
  </port>
112
  </output>
113
  </layer>
114
- <layer id="6" name="Constant_32424" type="Const" version="opset1">
115
  <data element_type="u8" shape="47" offset="528257" size="47" />
116
  <output>
117
  <port id="0" precision="U8">
@@ -119,7 +119,7 @@
119
  </port>
120
  </output>
121
  </layer>
122
- <layer id="7" name="Constant_32426" type="Const" version="opset1">
123
  <data element_type="u8" shape="2" offset="528304" size="2" />
124
  <output>
125
  <port id="0" precision="U8">
@@ -127,7 +127,7 @@
127
  </port>
128
  </output>
129
  </layer>
130
- <layer id="8" name="RegexNormalization_32427" type="RegexNormalization" version="extension">
131
  <data global_replace="true" />
132
  <input>
133
  <port id="0" precision="I32">
@@ -158,7 +158,7 @@
158
  </port>
159
  </output>
160
  </layer>
161
- <layer id="9" name="StringTensorPack_32428" type="StringTensorPack" version="extension">
162
  <data mode="begins_ends" />
163
  <input>
164
  <port id="0" precision="I32">
@@ -177,7 +177,7 @@
177
  </port>
178
  </output>
179
  </layer>
180
- <layer id="10" name="Result_32429" type="Result" version="opset1">
181
  <input>
182
  <port id="0" precision="STRING">
183
  <dim>-1</dim>
@@ -207,9 +207,29 @@
207
  <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
208
  </edges>
209
  <rt_info>
 
 
 
210
  <bos_token_id value="50257" />
 
 
211
  <eos_token_id value="50257" />
 
 
 
 
212
  <original_tokenizer_class value="&lt;class 'transformers.models.whisper.tokenization_whisper_fast.WhisperTokenizerFast'>" />
213
  <pad_token_id value="50257" />
 
 
 
 
 
 
 
 
 
 
 
214
  </rt_info>
215
  </net>
 
1
  <?xml version="1.0"?>
2
  <net name="detokenizer" version="11">
3
  <layers>
4
+ <layer id="0" name="Parameter_32760" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
6
  <output>
7
+ <port id="0" precision="I64" names="Parameter_32760">
8
  <dim>-1</dim>
9
  <dim>-1</dim>
10
  </port>
11
  </output>
12
  </layer>
13
+ <layer id="1" name="Convert_32776" type="Convert" version="opset1">
14
  <data destination_type="i32" />
15
  <input>
16
  <port id="0" precision="I64">
 
25
  </port>
26
  </output>
27
  </layer>
28
+ <layer id="2" name="Constant_32721" type="Const" version="opset1">
29
  <data element_type="u8" shape="528257" offset="0" size="528257" />
30
  <output>
31
  <port id="0" precision="U8">
 
33
  </port>
34
  </output>
35
  </layer>
36
+ <layer id="3" name="StringTensorUnpack_32722" type="StringTensorUnpack" version="extension">
37
  <data mode="begins_ends" />
38
  <input>
39
  <port id="0" precision="U8">
 
52
  </port>
53
  </output>
54
  </layer>
55
+ <layer id="4" name="VocabDecoder_32761" type="VocabDecoder" version="extension">
56
  <data skip_tokens="50257, 50258, 50259, 50260, 50261, 50262, 50263, 50264, 50265, 50266, 50267, 50268, 50269, 50270, 50271, 50272, 50273, 50274, 50275, 50276, 50277, 50278, 50279, 50280, 50281, 50282, 50283, 50284, 50285, 50286, 50287, 50288, 50289, 50290, 50291, 50292, 50293, 50294, 50295, 50296, 50297, 50298, 50299, 50300, 50301, 50302, 50303, 50304, 50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313, 50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321, 50322, 50323, 50324, 50325, 50326, 50327, 50328, 50329, 50330, 50331, 50332, 50333, 50334, 50335, 50336, 50337, 50338, 50339, 50340, 50341, 50342, 50343, 50344, 50345, 50346, 50347, 50348, 50349, 50350, 50351, 50352, 50353, 50354, 50355, 50356, 50357, 50358, 50359, 50360, 50361, 50362, 50363, 50364" />
57
  <input>
58
  <port id="0" precision="I32">
 
87
  </port>
88
  </output>
89
  </layer>
90
+ <layer id="5" name="FuzeRagged_32762" type="FuzeRagged" version="extension">
91
  <input>
92
  <port id="0" precision="I32">
93
  <dim>-1</dim>
 
111
  </port>
112
  </output>
113
  </layer>
114
+ <layer id="6" name="Constant_32764" type="Const" version="opset1">
115
  <data element_type="u8" shape="47" offset="528257" size="47" />
116
  <output>
117
  <port id="0" precision="U8">
 
119
  </port>
120
  </output>
121
  </layer>
122
+ <layer id="7" name="Constant_32766" type="Const" version="opset1">
123
  <data element_type="u8" shape="2" offset="528304" size="2" />
124
  <output>
125
  <port id="0" precision="U8">
 
127
  </port>
128
  </output>
129
  </layer>
130
+ <layer id="8" name="RegexNormalization_32767" type="RegexNormalization" version="extension">
131
  <data global_replace="true" />
132
  <input>
133
  <port id="0" precision="I32">
 
158
  </port>
159
  </output>
160
  </layer>
161
+ <layer id="9" name="StringTensorPack_32768" type="StringTensorPack" version="extension">
162
  <data mode="begins_ends" />
163
  <input>
164
  <port id="0" precision="I32">
 
177
  </port>
178
  </output>
179
  </layer>
180
+ <layer id="10" name="Result_32769" type="Result" version="opset1">
181
  <input>
182
  <port id="0" precision="STRING">
183
  <dim>-1</dim>
 
207
  <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
208
  </edges>
209
  <rt_info>
210
+ <add_attention_mask value="True" />
211
+ <add_prefix_space />
212
+ <add_special_tokens value="True" />
213
  <bos_token_id value="50257" />
214
+ <clean_up_tokenization_spaces />
215
+ <detokenizer_input_type value="i64" />
216
  <eos_token_id value="50257" />
217
+ <handle_special_tokens_with_re />
218
+ <number_of_inputs value="1" />
219
+ <openvino_tokenizers_version value="2024.5.0.0" />
220
+ <openvino_version value="2024.5.0" />
221
  <original_tokenizer_class value="&lt;class 'transformers.models.whisper.tokenization_whisper_fast.WhisperTokenizerFast'>" />
222
  <pad_token_id value="50257" />
223
+ <sentencepiece_version value="0.2.0" />
224
+ <skip_special_tokens value="True" />
225
+ <streaming_detokenizer value="False" />
226
+ <tiktoken_version value="0.8.0" />
227
+ <tokenizer_output_type value="i64" />
228
+ <tokenizers_version value="0.20.3" />
229
+ <transformers_version value="4.46.3" />
230
+ <use_max_padding value="False" />
231
+ <use_sentencepiece_backend value="False" />
232
+ <utf8_replace_mode />
233
+ <with_detokenizer value="True" />
234
  </rt_info>
235
  </net>
openvino_encoder_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.xml CHANGED
@@ -1,27 +1,27 @@
1
  <?xml version="1.0"?>
2
  <net name="tokenizer" version="11">
3
  <layers>
4
- <layer id="0" name="Parameter_32300" type="Parameter" version="opset1">
5
  <data shape="?" element_type="string" />
6
  <output>
7
- <port id="0" precision="STRING" names="Parameter_32300">
8
  <dim>-1</dim>
9
  </port>
10
  </output>
11
  </layer>
12
- <layer id="1" name="Constant_32398" type="Const" version="opset1">
13
  <data element_type="i32" shape="" offset="0" size="4" />
14
  <output>
15
  <port id="0" precision="I32" />
16
  </output>
17
  </layer>
18
- <layer id="2" name="Constant_32399" type="Const" version="opset1">
19
  <data element_type="i32" shape="" offset="4" size="4" />
20
  <output>
21
  <port id="0" precision="I32" />
22
  </output>
23
  </layer>
24
- <layer id="3" name="Constant_32400" type="Const" version="opset1">
25
  <data element_type="i32" shape="1" offset="8" size="4" />
26
  <output>
27
  <port id="0" precision="I32">
@@ -29,19 +29,19 @@
29
  </port>
30
  </output>
31
  </layer>
32
- <layer id="4" name="Constant_32401" type="Const" version="opset1">
33
  <data element_type="i32" shape="" offset="0" size="4" />
34
  <output>
35
  <port id="0" precision="I32" />
36
  </output>
37
  </layer>
38
- <layer id="5" name="Constant_32402" type="Const" version="opset1">
39
  <data element_type="i32" shape="" offset="4" size="4" />
40
  <output>
41
  <port id="0" precision="I32" />
42
  </output>
43
  </layer>
44
- <layer id="6" name="Constant_32403" type="Const" version="opset1">
45
  <data element_type="i32" shape="1" offset="12" size="4" />
46
  <output>
47
  <port id="0" precision="I32">
@@ -49,13 +49,13 @@
49
  </port>
50
  </output>
51
  </layer>
52
- <layer id="7" name="Constant_32306" type="Const" version="opset1">
53
  <data element_type="i64" shape="" offset="16" size="8" />
54
  <output>
55
  <port id="0" precision="I64" />
56
  </output>
57
  </layer>
58
- <layer id="8" name="StringTensorUnpack_32301" type="StringTensorUnpack" version="extension">
59
  <data mode="begins_ends" />
60
  <input>
61
  <port id="0" precision="STRING">
@@ -74,7 +74,7 @@
74
  </port>
75
  </output>
76
  </layer>
77
- <layer id="9" name="ShapeOf_32302" type="ShapeOf" version="opset3">
78
  <data output_type="i64" />
79
  <input>
80
  <port id="0" precision="I32">
@@ -87,19 +87,19 @@
87
  </port>
88
  </output>
89
  </layer>
90
- <layer id="10" name="Constant_32303" type="Const" version="opset1">
91
  <data element_type="i64" shape="" offset="16" size="8" />
92
  <output>
93
  <port id="0" precision="I64" />
94
  </output>
95
  </layer>
96
- <layer id="11" name="Constant_32304" type="Const" version="opset1">
97
  <data element_type="i64" shape="" offset="16" size="8" />
98
  <output>
99
  <port id="0" precision="I64" />
100
  </output>
101
  </layer>
102
- <layer id="12" name="Gather_32305" type="Gather" version="opset8">
103
  <data batch_dims="0" />
104
  <input>
105
  <port id="0" precision="I64">
@@ -112,13 +112,13 @@
112
  <port id="3" precision="I64" />
113
  </output>
114
  </layer>
115
- <layer id="13" name="Constant_32307" type="Const" version="opset1">
116
  <data element_type="i64" shape="" offset="24" size="8" />
117
  <output>
118
  <port id="0" precision="I64" />
119
  </output>
120
  </layer>
121
- <layer id="14" name="Range_32308" type="Range" version="opset4">
122
  <data output_type="i32" />
123
  <input>
124
  <port id="0" precision="I64" />
@@ -131,19 +131,19 @@
131
  </port>
132
  </output>
133
  </layer>
134
- <layer id="15" name="Constant_32309" type="Const" version="opset1">
135
  <data element_type="i64" shape="" offset="24" size="8" />
136
  <output>
137
  <port id="0" precision="I64" />
138
  </output>
139
  </layer>
140
- <layer id="16" name="Constant_32310" type="Const" version="opset1">
141
  <data element_type="i64" shape="" offset="24" size="8" />
142
  <output>
143
  <port id="0" precision="I64" />
144
  </output>
145
  </layer>
146
- <layer id="17" name="Add_32311" type="Add" version="opset1">
147
  <data auto_broadcast="numpy" />
148
  <input>
149
  <port id="0" precision="I64" />
@@ -153,13 +153,13 @@
153
  <port id="2" precision="I64" />
154
  </output>
155
  </layer>
156
- <layer id="18" name="Constant_32312" type="Const" version="opset1">
157
  <data element_type="i64" shape="" offset="24" size="8" />
158
  <output>
159
  <port id="0" precision="I64" />
160
  </output>
161
  </layer>
162
- <layer id="19" name="Range_32313" type="Range" version="opset4">
163
  <data output_type="i32" />
164
  <input>
165
  <port id="0" precision="I64" />
@@ -172,7 +172,7 @@
172
  </port>
173
  </output>
174
  </layer>
175
- <layer id="20" name="Constant_32375" type="Const" version="opset1">
176
  <data element_type="u8" shape="26491" offset="32" size="26491" />
177
  <output>
178
  <port id="0" precision="U8">
@@ -180,7 +180,7 @@
180
  </port>
181
  </output>
182
  </layer>
183
- <layer id="21" name="SpecialTokensSplit_32376" type="SpecialTokensSplit" version="extension">
184
  <input>
185
  <port id="0" precision="I32">
186
  <dim>-1</dim>
@@ -222,7 +222,7 @@
222
  </port>
223
  </output>
224
  </layer>
225
- <layer id="22" name="Constant_32378" type="Const" version="opset1">
226
  <data element_type="u8" shape="64" offset="26523" size="64" />
227
  <output>
228
  <port id="0" precision="U8">
@@ -230,7 +230,7 @@
230
  </port>
231
  </output>
232
  </layer>
233
- <layer id="23" name="RegexSplit_32379" type="RegexSplit" version="extension">
234
  <data behaviour="isolate" invert="false" max_splits="-1" />
235
  <input>
236
  <port id="0" precision="I32">
@@ -276,7 +276,7 @@
276
  </port>
277
  </output>
278
  </layer>
279
- <layer id="24" name="Constant_32381" type="Const" version="opset1">
280
  <data element_type="u8" shape="528257" offset="26587" size="528257" />
281
  <output>
282
  <port id="0" precision="U8">
@@ -284,7 +284,7 @@
284
  </port>
285
  </output>
286
  </layer>
287
- <layer id="25" name="StringTensorUnpack_32382" type="StringTensorUnpack" version="extension">
288
  <data mode="begins_ends" />
289
  <input>
290
  <port id="0" precision="U8">
@@ -303,7 +303,7 @@
303
  </port>
304
  </output>
305
  </layer>
306
- <layer id="26" name="Constant_32387" type="Const" version="opset1">
307
  <data element_type="u8" shape="369958" offset="554844" size="369958" />
308
  <output>
309
  <port id="0" precision="U8">
@@ -311,7 +311,7 @@
311
  </port>
312
  </output>
313
  </layer>
314
- <layer id="27" name="StringTensorUnpack_32388" type="StringTensorUnpack" version="extension">
315
  <data mode="begins_ends" />
316
  <input>
317
  <port id="0" precision="U8">
@@ -330,7 +330,7 @@
330
  </port>
331
  </output>
332
  </layer>
333
- <layer id="28" name="Constant_32390" type="Const" version="opset1">
334
  <data element_type="u8" shape="336859" offset="924802" size="336859" />
335
  <output>
336
  <port id="0" precision="U8">
@@ -338,7 +338,7 @@
338
  </port>
339
  </output>
340
  </layer>
341
- <layer id="29" name="StringTensorUnpack_32391" type="StringTensorUnpack" version="extension">
342
  <data mode="begins_ends" />
343
  <input>
344
  <port id="0" precision="U8">
@@ -357,7 +357,7 @@
357
  </port>
358
  </output>
359
  </layer>
360
- <layer id="30" name="Constant_32384" type="Const" version="opset1">
361
  <data element_type="u8" shape="20172" offset="1261661" size="20172" />
362
  <output>
363
  <port id="0" precision="U8">
@@ -365,7 +365,7 @@
365
  </port>
366
  </output>
367
  </layer>
368
- <layer id="31" name="StringTensorUnpack_32385" type="StringTensorUnpack" version="extension">
369
  <data mode="begins_ends" />
370
  <input>
371
  <port id="0" precision="U8">
@@ -384,7 +384,7 @@
384
  </port>
385
  </output>
386
  </layer>
387
- <layer id="32" name="Constant_32392" type="Const" version="opset1">
388
  <data element_type="i32" shape="1609" offset="1281833" size="6436" />
389
  <output>
390
  <port id="0" precision="I32">
@@ -392,7 +392,7 @@
392
  </port>
393
  </output>
394
  </layer>
395
- <layer id="33" name="BPETokenizer_32393" type="BPETokenizer" version="extension">
396
  <data unk_token="" fuse_unk="false" suffix_indicator="" end_suffix="" byte_fallback="false" cache_capacity="20000" />
397
  <input>
398
  <port id="0" precision="I32">
@@ -462,7 +462,7 @@
462
  </port>
463
  </output>
464
  </layer>
465
- <layer id="34" name="Subtract_32394" type="Subtract" version="opset1">
466
  <data auto_broadcast="numpy" />
467
  <input>
468
  <port id="0" precision="I32">
@@ -478,13 +478,13 @@
478
  </port>
479
  </output>
480
  </layer>
481
- <layer id="35" name="Constant_32395" type="Const" version="opset1">
482
  <data element_type="i32" shape="" offset="1288269" size="4" />
483
  <output>
484
  <port id="0" precision="I32" />
485
  </output>
486
  </layer>
487
- <layer id="36" name="Minimum_32396" type="Minimum" version="opset1">
488
  <data auto_broadcast="numpy" />
489
  <input>
490
  <port id="0" precision="I32">
@@ -498,7 +498,7 @@
498
  </port>
499
  </output>
500
  </layer>
501
- <layer id="37" name="Add_32397" type="Add" version="opset1">
502
  <data auto_broadcast="numpy" />
503
  <input>
504
  <port id="0" precision="I32">
@@ -514,19 +514,19 @@
514
  </port>
515
  </output>
516
  </layer>
517
- <layer id="38" name="Constant_32404" type="Const" version="opset1">
518
  <data element_type="i32" shape="" offset="0" size="4" />
519
  <output>
520
  <port id="0" precision="I32" />
521
  </output>
522
  </layer>
523
- <layer id="39" name="Constant_32405" type="Const" version="opset1">
524
  <data element_type="i32" shape="" offset="4" size="4" />
525
  <output>
526
  <port id="0" precision="I32" />
527
  </output>
528
  </layer>
529
- <layer id="40" name="Constant_32406" type="Const" version="opset1">
530
  <data element_type="i32" shape="1" offset="1288273" size="4" />
531
  <output>
532
  <port id="0" precision="I32">
@@ -534,7 +534,7 @@
534
  </port>
535
  </output>
536
  </layer>
537
- <layer id="41" name="Constant_32407" type="Const" version="opset1">
538
  <data element_type="i32" shape="4" offset="1288277" size="16" />
539
  <output>
540
  <port id="0" precision="I32">
@@ -542,7 +542,7 @@
542
  </port>
543
  </output>
544
  </layer>
545
- <layer id="42" name="CombineSegments_32408" type="CombineSegments" version="extension">
546
  <input>
547
  <port id="0" precision="I32" />
548
  <port id="1" precision="I32" />
@@ -593,7 +593,7 @@
593
  </port>
594
  </output>
595
  </layer>
596
- <layer id="43" name="Subtract_32409" type="Subtract" version="opset1">
597
  <data auto_broadcast="numpy" />
598
  <input>
599
  <port id="0" precision="I32">
@@ -609,13 +609,13 @@
609
  </port>
610
  </output>
611
  </layer>
612
- <layer id="44" name="Constant_32410" type="Const" version="opset1">
613
  <data element_type="i32" shape="" offset="0" size="4" />
614
  <output>
615
  <port id="0" precision="I32" />
616
  </output>
617
  </layer>
618
- <layer id="45" name="ReduceMax_32411" type="ReduceMax" version="opset1">
619
  <data keep_dims="false" />
620
  <input>
621
  <port id="0" precision="I32">
@@ -627,13 +627,13 @@
627
  <port id="2" precision="I32" />
628
  </output>
629
  </layer>
630
- <layer id="46" name="Constant_32412" type="Const" version="opset1">
631
  <data element_type="i32" shape="" offset="1288273" size="4" />
632
  <output>
633
  <port id="0" precision="I32" />
634
  </output>
635
  </layer>
636
- <layer id="47" name="RaggedToDense_32413" type="RaggedToDense" version="extension">
637
  <data pad_right="true" />
638
  <input>
639
  <port id="0" precision="I32">
@@ -659,7 +659,7 @@
659
  </port>
660
  </output>
661
  </layer>
662
- <layer id="48" name="Convert_32414" type="Convert" version="opset1">
663
  <data destination_type="i32" />
664
  <input>
665
  <port id="0" precision="BOOL">
@@ -674,7 +674,7 @@
674
  </port>
675
  </output>
676
  </layer>
677
- <layer id="49" name="Convert_32414" type="Convert" version="opset1">
678
  <data destination_type="i64" />
679
  <input>
680
  <port id="0" precision="I32">
@@ -689,7 +689,7 @@
689
  </port>
690
  </output>
691
  </layer>
692
- <layer id="51" name="RaggedToDense_32413.0" type="Convert" version="opset1">
693
  <data destination_type="i64" />
694
  <input>
695
  <port id="0" precision="I32">
@@ -704,7 +704,7 @@
704
  </port>
705
  </output>
706
  </layer>
707
- <layer id="52" name="Result_32417" type="Result" version="opset1">
708
  <input>
709
  <port id="0" precision="I64">
710
  <dim>-1</dim>
@@ -712,7 +712,7 @@
712
  </port>
713
  </input>
714
  </layer>
715
- <layer id="50" name="Result_32419" type="Result" version="opset1">
716
  <input>
717
  <port id="0" precision="I64">
718
  <dim>-1</dim>
@@ -805,9 +805,29 @@
805
  <edge from-layer="51" from-port="1" to-layer="52" to-port="0" />
806
  </edges>
807
  <rt_info>
 
 
 
808
  <bos_token_id value="50257" />
 
 
809
  <eos_token_id value="50257" />
 
 
 
 
810
  <original_tokenizer_class value="&lt;class 'transformers.models.whisper.tokenization_whisper_fast.WhisperTokenizerFast'>" />
811
  <pad_token_id value="50257" />
 
 
 
 
 
 
 
 
 
 
 
812
  </rt_info>
813
  </net>
 
1
  <?xml version="1.0"?>
2
  <net name="tokenizer" version="11">
3
  <layers>
4
+ <layer id="0" name="Parameter_32640" type="Parameter" version="opset1">
5
  <data shape="?" element_type="string" />
6
  <output>
7
+ <port id="0" precision="STRING" names="Parameter_32640">
8
  <dim>-1</dim>
9
  </port>
10
  </output>
11
  </layer>
12
+ <layer id="1" name="Constant_32738" type="Const" version="opset1">
13
  <data element_type="i32" shape="" offset="0" size="4" />
14
  <output>
15
  <port id="0" precision="I32" />
16
  </output>
17
  </layer>
18
+ <layer id="2" name="Constant_32739" type="Const" version="opset1">
19
  <data element_type="i32" shape="" offset="4" size="4" />
20
  <output>
21
  <port id="0" precision="I32" />
22
  </output>
23
  </layer>
24
+ <layer id="3" name="Constant_32740" type="Const" version="opset1">
25
  <data element_type="i32" shape="1" offset="8" size="4" />
26
  <output>
27
  <port id="0" precision="I32">
 
29
  </port>
30
  </output>
31
  </layer>
32
+ <layer id="4" name="Constant_32741" type="Const" version="opset1">
33
  <data element_type="i32" shape="" offset="0" size="4" />
34
  <output>
35
  <port id="0" precision="I32" />
36
  </output>
37
  </layer>
38
+ <layer id="5" name="Constant_32742" type="Const" version="opset1">
39
  <data element_type="i32" shape="" offset="4" size="4" />
40
  <output>
41
  <port id="0" precision="I32" />
42
  </output>
43
  </layer>
44
+ <layer id="6" name="Constant_32743" type="Const" version="opset1">
45
  <data element_type="i32" shape="1" offset="12" size="4" />
46
  <output>
47
  <port id="0" precision="I32">
 
49
  </port>
50
  </output>
51
  </layer>
52
+ <layer id="7" name="Constant_32646" type="Const" version="opset1">
53
  <data element_type="i64" shape="" offset="16" size="8" />
54
  <output>
55
  <port id="0" precision="I64" />
56
  </output>
57
  </layer>
58
+ <layer id="8" name="StringTensorUnpack_32641" type="StringTensorUnpack" version="extension">
59
  <data mode="begins_ends" />
60
  <input>
61
  <port id="0" precision="STRING">
 
74
  </port>
75
  </output>
76
  </layer>
77
+ <layer id="9" name="ShapeOf_32642" type="ShapeOf" version="opset3">
78
  <data output_type="i64" />
79
  <input>
80
  <port id="0" precision="I32">
 
87
  </port>
88
  </output>
89
  </layer>
90
+ <layer id="10" name="Constant_32643" type="Const" version="opset1">
91
  <data element_type="i64" shape="" offset="16" size="8" />
92
  <output>
93
  <port id="0" precision="I64" />
94
  </output>
95
  </layer>
96
+ <layer id="11" name="Constant_32644" type="Const" version="opset1">
97
  <data element_type="i64" shape="" offset="16" size="8" />
98
  <output>
99
  <port id="0" precision="I64" />
100
  </output>
101
  </layer>
102
+ <layer id="12" name="Gather_32645" type="Gather" version="opset8">
103
  <data batch_dims="0" />
104
  <input>
105
  <port id="0" precision="I64">
 
112
  <port id="3" precision="I64" />
113
  </output>
114
  </layer>
115
+ <layer id="13" name="Constant_32647" type="Const" version="opset1">
116
  <data element_type="i64" shape="" offset="24" size="8" />
117
  <output>
118
  <port id="0" precision="I64" />
119
  </output>
120
  </layer>
121
+ <layer id="14" name="Range_32648" type="Range" version="opset4">
122
  <data output_type="i32" />
123
  <input>
124
  <port id="0" precision="I64" />
 
131
  </port>
132
  </output>
133
  </layer>
134
+ <layer id="15" name="Constant_32649" type="Const" version="opset1">
135
  <data element_type="i64" shape="" offset="24" size="8" />
136
  <output>
137
  <port id="0" precision="I64" />
138
  </output>
139
  </layer>
140
+ <layer id="16" name="Constant_32650" type="Const" version="opset1">
141
  <data element_type="i64" shape="" offset="24" size="8" />
142
  <output>
143
  <port id="0" precision="I64" />
144
  </output>
145
  </layer>
146
+ <layer id="17" name="Add_32651" type="Add" version="opset1">
147
  <data auto_broadcast="numpy" />
148
  <input>
149
  <port id="0" precision="I64" />
 
153
  <port id="2" precision="I64" />
154
  </output>
155
  </layer>
156
+ <layer id="18" name="Constant_32652" type="Const" version="opset1">
157
  <data element_type="i64" shape="" offset="24" size="8" />
158
  <output>
159
  <port id="0" precision="I64" />
160
  </output>
161
  </layer>
162
+ <layer id="19" name="Range_32653" type="Range" version="opset4">
163
  <data output_type="i32" />
164
  <input>
165
  <port id="0" precision="I64" />
 
172
  </port>
173
  </output>
174
  </layer>
175
+ <layer id="20" name="Constant_32715" type="Const" version="opset1">
176
  <data element_type="u8" shape="26491" offset="32" size="26491" />
177
  <output>
178
  <port id="0" precision="U8">
 
180
  </port>
181
  </output>
182
  </layer>
183
+ <layer id="21" name="SpecialTokensSplit_32716" type="SpecialTokensSplit" version="extension">
184
  <input>
185
  <port id="0" precision="I32">
186
  <dim>-1</dim>
 
222
  </port>
223
  </output>
224
  </layer>
225
+ <layer id="22" name="Constant_32718" type="Const" version="opset1">
226
  <data element_type="u8" shape="64" offset="26523" size="64" />
227
  <output>
228
  <port id="0" precision="U8">
 
230
  </port>
231
  </output>
232
  </layer>
233
+ <layer id="23" name="RegexSplit_32719" type="RegexSplit" version="extension">
234
  <data behaviour="isolate" invert="false" max_splits="-1" />
235
  <input>
236
  <port id="0" precision="I32">
 
276
  </port>
277
  </output>
278
  </layer>
279
+ <layer id="24" name="Constant_32721" type="Const" version="opset1">
280
  <data element_type="u8" shape="528257" offset="26587" size="528257" />
281
  <output>
282
  <port id="0" precision="U8">
 
284
  </port>
285
  </output>
286
  </layer>
287
+ <layer id="25" name="StringTensorUnpack_32722" type="StringTensorUnpack" version="extension">
288
  <data mode="begins_ends" />
289
  <input>
290
  <port id="0" precision="U8">
 
303
  </port>
304
  </output>
305
  </layer>
306
+ <layer id="26" name="Constant_32727" type="Const" version="opset1">
307
  <data element_type="u8" shape="369958" offset="554844" size="369958" />
308
  <output>
309
  <port id="0" precision="U8">
 
311
  </port>
312
  </output>
313
  </layer>
314
+ <layer id="27" name="StringTensorUnpack_32728" type="StringTensorUnpack" version="extension">
315
  <data mode="begins_ends" />
316
  <input>
317
  <port id="0" precision="U8">
 
330
  </port>
331
  </output>
332
  </layer>
333
+ <layer id="28" name="Constant_32730" type="Const" version="opset1">
334
  <data element_type="u8" shape="336859" offset="924802" size="336859" />
335
  <output>
336
  <port id="0" precision="U8">
 
338
  </port>
339
  </output>
340
  </layer>
341
+ <layer id="29" name="StringTensorUnpack_32731" type="StringTensorUnpack" version="extension">
342
  <data mode="begins_ends" />
343
  <input>
344
  <port id="0" precision="U8">
 
357
  </port>
358
  </output>
359
  </layer>
360
+ <layer id="30" name="Constant_32724" type="Const" version="opset1">
361
  <data element_type="u8" shape="20172" offset="1261661" size="20172" />
362
  <output>
363
  <port id="0" precision="U8">
 
365
  </port>
366
  </output>
367
  </layer>
368
+ <layer id="31" name="StringTensorUnpack_32725" type="StringTensorUnpack" version="extension">
369
  <data mode="begins_ends" />
370
  <input>
371
  <port id="0" precision="U8">
 
384
  </port>
385
  </output>
386
  </layer>
387
+ <layer id="32" name="Constant_32732" type="Const" version="opset1">
388
  <data element_type="i32" shape="1609" offset="1281833" size="6436" />
389
  <output>
390
  <port id="0" precision="I32">
 
392
  </port>
393
  </output>
394
  </layer>
395
+ <layer id="33" name="BPETokenizer_32733" type="BPETokenizer" version="extension">
396
  <data unk_token="" fuse_unk="false" suffix_indicator="" end_suffix="" byte_fallback="false" cache_capacity="20000" />
397
  <input>
398
  <port id="0" precision="I32">
 
462
  </port>
463
  </output>
464
  </layer>
465
+ <layer id="34" name="Subtract_32734" type="Subtract" version="opset1">
466
  <data auto_broadcast="numpy" />
467
  <input>
468
  <port id="0" precision="I32">
 
478
  </port>
479
  </output>
480
  </layer>
481
+ <layer id="35" name="Constant_32735" type="Const" version="opset1">
482
  <data element_type="i32" shape="" offset="1288269" size="4" />
483
  <output>
484
  <port id="0" precision="I32" />
485
  </output>
486
  </layer>
487
+ <layer id="36" name="Minimum_32736" type="Minimum" version="opset1">
488
  <data auto_broadcast="numpy" />
489
  <input>
490
  <port id="0" precision="I32">
 
498
  </port>
499
  </output>
500
  </layer>
501
+ <layer id="37" name="Add_32737" type="Add" version="opset1">
502
  <data auto_broadcast="numpy" />
503
  <input>
504
  <port id="0" precision="I32">
 
514
  </port>
515
  </output>
516
  </layer>
517
+ <layer id="38" name="Constant_32744" type="Const" version="opset1">
518
  <data element_type="i32" shape="" offset="0" size="4" />
519
  <output>
520
  <port id="0" precision="I32" />
521
  </output>
522
  </layer>
523
+ <layer id="39" name="Constant_32745" type="Const" version="opset1">
524
  <data element_type="i32" shape="" offset="4" size="4" />
525
  <output>
526
  <port id="0" precision="I32" />
527
  </output>
528
  </layer>
529
+ <layer id="40" name="Constant_32746" type="Const" version="opset1">
530
  <data element_type="i32" shape="1" offset="1288273" size="4" />
531
  <output>
532
  <port id="0" precision="I32">
 
534
  </port>
535
  </output>
536
  </layer>
537
+ <layer id="41" name="Constant_32747" type="Const" version="opset1">
538
  <data element_type="i32" shape="4" offset="1288277" size="16" />
539
  <output>
540
  <port id="0" precision="I32">
 
542
  </port>
543
  </output>
544
  </layer>
545
+ <layer id="42" name="CombineSegments_32748" type="CombineSegments" version="extension">
546
  <input>
547
  <port id="0" precision="I32" />
548
  <port id="1" precision="I32" />
 
593
  </port>
594
  </output>
595
  </layer>
596
+ <layer id="43" name="Subtract_32749" type="Subtract" version="opset1">
597
  <data auto_broadcast="numpy" />
598
  <input>
599
  <port id="0" precision="I32">
 
609
  </port>
610
  </output>
611
  </layer>
612
+ <layer id="44" name="Constant_32750" type="Const" version="opset1">
613
  <data element_type="i32" shape="" offset="0" size="4" />
614
  <output>
615
  <port id="0" precision="I32" />
616
  </output>
617
  </layer>
618
+ <layer id="45" name="ReduceMax_32751" type="ReduceMax" version="opset1">
619
  <data keep_dims="false" />
620
  <input>
621
  <port id="0" precision="I32">
 
627
  <port id="2" precision="I32" />
628
  </output>
629
  </layer>
630
+ <layer id="46" name="Constant_32752" type="Const" version="opset1">
631
  <data element_type="i32" shape="" offset="1288273" size="4" />
632
  <output>
633
  <port id="0" precision="I32" />
634
  </output>
635
  </layer>
636
+ <layer id="47" name="RaggedToDense_32753" type="RaggedToDense" version="extension">
637
  <data pad_right="true" />
638
  <input>
639
  <port id="0" precision="I32">
 
659
  </port>
660
  </output>
661
  </layer>
662
+ <layer id="48" name="Convert_32754" type="Convert" version="opset1">
663
  <data destination_type="i32" />
664
  <input>
665
  <port id="0" precision="BOOL">
 
674
  </port>
675
  </output>
676
  </layer>
677
+ <layer id="49" name="Convert_32754" type="Convert" version="opset1">
678
  <data destination_type="i64" />
679
  <input>
680
  <port id="0" precision="I32">
 
689
  </port>
690
  </output>
691
  </layer>
692
+ <layer id="51" name="RaggedToDense_32753.0" type="Convert" version="opset1">
693
  <data destination_type="i64" />
694
  <input>
695
  <port id="0" precision="I32">
 
704
  </port>
705
  </output>
706
  </layer>
707
+ <layer id="52" name="Result_32757" type="Result" version="opset1">
708
  <input>
709
  <port id="0" precision="I64">
710
  <dim>-1</dim>
 
712
  </port>
713
  </input>
714
  </layer>
715
+ <layer id="50" name="Result_32759" type="Result" version="opset1">
716
  <input>
717
  <port id="0" precision="I64">
718
  <dim>-1</dim>
 
805
  <edge from-layer="51" from-port="1" to-layer="52" to-port="0" />
806
  </edges>
807
  <rt_info>
808
+ <add_attention_mask value="True" />
809
+ <add_prefix_space />
810
+ <add_special_tokens value="True" />
811
  <bos_token_id value="50257" />
812
+ <clean_up_tokenization_spaces />
813
+ <detokenizer_input_type value="i64" />
814
  <eos_token_id value="50257" />
815
+ <handle_special_tokens_with_re />
816
+ <number_of_inputs value="1" />
817
+ <openvino_tokenizers_version value="2024.5.0.0" />
818
+ <openvino_version value="2024.5.0" />
819
  <original_tokenizer_class value="&lt;class 'transformers.models.whisper.tokenization_whisper_fast.WhisperTokenizerFast'>" />
820
  <pad_token_id value="50257" />
821
+ <sentencepiece_version value="0.2.0" />
822
+ <skip_special_tokens value="True" />
823
+ <streaming_detokenizer value="False" />
824
+ <tiktoken_version value="0.8.0" />
825
+ <tokenizer_output_type value="i64" />
826
+ <tokenizers_version value="0.20.3" />
827
+ <transformers_version value="4.46.3" />
828
+ <use_max_padding value="False" />
829
+ <use_sentencepiece_backend value="False" />
830
+ <utf8_replace_mode />
831
+ <with_detokenizer value="True" />
832
  </rt_info>
833
  </net>