hevok commited on
Commit
d4076ef
·
verified ·
1 Parent(s): 91b06be

Upload 2 files

Browse files
lm_eval/Qween__Qwen2.5-3B/Qwen2.5-3B_eng.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "batch_size": "auto:4 (32,64,64,64,64)",
4
+ "pretrained": "Qwen2.5-3B",
5
+ "dtype": "float",
6
+ "trust_remote_code": true,
7
+ "gen_kwargs": null,
8
+ "limit": null,
9
+ "num_fewshot": 0
10
+ },
11
+ "results": {
12
+ "arc_challenge": {
13
+ "version": 1.0,
14
+ "nshot": 0,
15
+ "alias": "arc_challenge",
16
+ "acc,none": 0.4497,
17
+ "stderr": 0.0146,
18
+ "acc_norm,none": 0.4744
19
+ },
20
+ "arc_easy": {
21
+ "version": 1.0,
22
+ "nshot": 0,
23
+ "alias": "arc_easy",
24
+ "acc,none": 0.7744,
25
+ "stderr": 0.0091,
26
+ "acc_norm,none": 0.7311
27
+ },
28
+ "cola": {
29
+ "version": 1.0,
30
+ "nshot": 0,
31
+ "alias": "- cola",
32
+ "mcc,none": 0.1568,
33
+ "stderr": 0.0331
34
+ },
35
+ "glue": {
36
+ "nshot": 0,
37
+ "alias": "glue",
38
+ "acc,none": 0.7514,
39
+ "stderr": 0.0331,
40
+ "f1,none": 0.8016,
41
+ "mcc,none": 0.1568
42
+ },
43
+ "hellaswag": {
44
+ "version": 1.0,
45
+ "nshot": 0,
46
+ "alias": "hellaswag",
47
+ "acc,none": 0.5492,
48
+ "stderr": 0.0044,
49
+ "acc_norm,none": 0.7353
50
+ },
51
+ "lambada_openai": {
52
+ "version": 1.0,
53
+ "nshot": 0,
54
+ "alias": "lambada_openai",
55
+ "acc,none": 0.6715,
56
+ "stderr": 0.1095,
57
+ "perplexity,none": 4.6136
58
+ },
59
+ "mnli": {
60
+ "version": 1.0,
61
+ "nshot": 0,
62
+ "alias": "- mnli",
63
+ "acc,none": 0.5515,
64
+ "stderr": 0.005
65
+ },
66
+ "mnli_mismatch": {
67
+ "version": 1.0,
68
+ "nshot": 0,
69
+ "alias": "- mnli_mismatch",
70
+ "acc,none": 0.5659,
71
+ "stderr": 0.005
72
+ },
73
+ "mrpc": {
74
+ "version": 1.0,
75
+ "nshot": 0,
76
+ "alias": "- mrpc",
77
+ "acc,none": 0.7672,
78
+ "stderr": 0.0168,
79
+ "f1,none": 0.8342
80
+ },
81
+ "piqa": {
82
+ "version": 1.0,
83
+ "nshot": 0,
84
+ "alias": "piqa",
85
+ "acc,none": 0.7856,
86
+ "stderr": 0.0095,
87
+ "acc_norm,none": 0.7884
88
+ },
89
+ "qnli": {
90
+ "version": 1.0,
91
+ "nshot": 0,
92
+ "alias": "- qnli",
93
+ "acc,none": 0.6723,
94
+ "stderr": 0.0064
95
+ },
96
+ "qqp": {
97
+ "version": 2.0,
98
+ "nshot": 0,
99
+ "alias": "- qqp",
100
+ "acc,none": 0.8527,
101
+ "stderr": 0.0025,
102
+ "f1,none": 0.8012
103
+ },
104
+ "rte": {
105
+ "version": 1.0,
106
+ "nshot": 0,
107
+ "alias": "- rte",
108
+ "acc,none": 0.7545,
109
+ "stderr": 0.0259
110
+ },
111
+ "sciq": {
112
+ "version": 1.0,
113
+ "nshot": 0,
114
+ "alias": "sciq",
115
+ "acc,none": 0.962,
116
+ "stderr": 0.0066,
117
+ "acc_norm,none": 0.954
118
+ },
119
+ "sst2": {
120
+ "version": 1.0,
121
+ "nshot": 0,
122
+ "alias": "- sst2",
123
+ "acc,none": 0.9014,
124
+ "stderr": 0.0101
125
+ },
126
+ "winogrande": {
127
+ "version": 1.0,
128
+ "nshot": 0,
129
+ "alias": "winogrande",
130
+ "acc,none": 0.6851,
131
+ "stderr": 0.0131
132
+ },
133
+ "wnli": {
134
+ "version": 2.0,
135
+ "nshot": 0,
136
+ "alias": "- wnli",
137
+ "acc,none": 0.5493,
138
+ "stderr": 0.0595
139
+ }
140
+ }
141
+ }
lm_eval/Qween__Qwen2.5-3B/Qwen2.5-3B_multilang.json ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "batch_size": "auto:4 (32,64,64,64,64)",
4
+ "pretrained": "Qwen2.5-3B",
5
+ "dtype": "float",
6
+ "trust_remote_code": true,
7
+ "gen_kwargs": null,
8
+ "limit": null,
9
+ "num_fewshot": 0
10
+ },
11
+ "results": {
12
+ "lambada_multilingual": {
13
+ "nshot": 0,
14
+ "alias": "lambada_multilingual",
15
+ "acc,none": 0.4347,
16
+ "stderr": 1.5222,
17
+ "perplexity,none": 51.4735
18
+ },
19
+ "lambada_openai_mt_de": {
20
+ "version": 1.0,
21
+ "nshot": 0,
22
+ "alias": "- lambada_openai_mt_de",
23
+ "acc,none": 0.3736,
24
+ "stderr": 3.6478,
25
+ "perplexity,none": 61.0501
26
+ },
27
+ "lambada_openai_mt_en": {
28
+ "version": 1.0,
29
+ "nshot": 0,
30
+ "alias": "- lambada_openai_mt_en",
31
+ "acc,none": 0.6715,
32
+ "stderr": 0.1095,
33
+ "perplexity,none": 4.6137
34
+ },
35
+ "lambada_openai_mt_es": {
36
+ "version": 1.0,
37
+ "nshot": 0,
38
+ "alias": "- lambada_openai_mt_es",
39
+ "acc,none": 0.2591,
40
+ "stderr": 5.4857,
41
+ "perplexity,none": 99.7598
42
+ },
43
+ "lambada_openai_mt_fr": {
44
+ "version": 1.0,
45
+ "nshot": 0,
46
+ "alias": "- lambada_openai_mt_fr",
47
+ "acc,none": 0.4388,
48
+ "stderr": 2.142,
49
+ "perplexity,none": 39.3861
50
+ },
51
+ "lambada_openai_mt_it": {
52
+ "version": 1.0,
53
+ "nshot": 0,
54
+ "alias": "- lambada_openai_mt_it",
55
+ "acc,none": 0.4308,
56
+ "stderr": 3.1508,
57
+ "perplexity,none": 52.5578
58
+ },
59
+ "paws_de": {
60
+ "version": 0.0,
61
+ "nshot": 0,
62
+ "alias": "- paws_de",
63
+ "acc,none": 0.453,
64
+ "stderr": 0.0111
65
+ },
66
+ "paws_en": {
67
+ "version": 0.0,
68
+ "nshot": 0,
69
+ "alias": "- paws_en",
70
+ "acc,none": 0.423,
71
+ "stderr": 0.011
72
+ },
73
+ "paws_es": {
74
+ "version": 0.0,
75
+ "nshot": 0,
76
+ "alias": "- paws_es",
77
+ "acc,none": 0.46,
78
+ "stderr": 0.0111
79
+ },
80
+ "paws_fr": {
81
+ "version": 0.0,
82
+ "nshot": 0,
83
+ "alias": "- paws_fr",
84
+ "acc,none": 0.52,
85
+ "stderr": 0.0112
86
+ },
87
+ "paws_ja": {
88
+ "version": 0.0,
89
+ "nshot": 0,
90
+ "alias": "- paws_ja",
91
+ "acc,none": 0.535,
92
+ "stderr": 0.0112
93
+ },
94
+ "paws_ko": {
95
+ "version": 0.0,
96
+ "nshot": 0,
97
+ "alias": "- paws_ko",
98
+ "acc,none": 0.519,
99
+ "stderr": 0.0112
100
+ },
101
+ "paws_zh": {
102
+ "version": 0.0,
103
+ "nshot": 0,
104
+ "alias": "- paws_zh",
105
+ "acc,none": 0.474,
106
+ "stderr": 0.0112
107
+ },
108
+ "pawsx": {
109
+ "nshot": 0,
110
+ "alias": "pawsx",
111
+ "acc,none": 0.4834,
112
+ "stderr": 0.0042
113
+ },
114
+ "xcopa": {
115
+ "nshot": 0,
116
+ "alias": "xcopa",
117
+ "acc,none": 0.59,
118
+ "stderr": 0.0065
119
+ },
120
+ "xcopa_et": {
121
+ "version": 1.0,
122
+ "nshot": 0,
123
+ "alias": "- xcopa_et",
124
+ "acc,none": 0.48,
125
+ "stderr": 0.0224
126
+ },
127
+ "xcopa_ht": {
128
+ "version": 1.0,
129
+ "nshot": 0,
130
+ "alias": "- xcopa_ht",
131
+ "acc,none": 0.504,
132
+ "stderr": 0.0224
133
+ },
134
+ "xcopa_id": {
135
+ "version": 1.0,
136
+ "nshot": 0,
137
+ "alias": "- xcopa_id",
138
+ "acc,none": 0.672,
139
+ "stderr": 0.021
140
+ },
141
+ "xcopa_it": {
142
+ "version": 1.0,
143
+ "nshot": 0,
144
+ "alias": "- xcopa_it",
145
+ "acc,none": 0.674,
146
+ "stderr": 0.021
147
+ },
148
+ "xcopa_qu": {
149
+ "version": 1.0,
150
+ "nshot": 0,
151
+ "alias": "- xcopa_qu",
152
+ "acc,none": 0.516,
153
+ "stderr": 0.0224
154
+ },
155
+ "xcopa_sw": {
156
+ "version": 1.0,
157
+ "nshot": 0,
158
+ "alias": "- xcopa_sw",
159
+ "acc,none": 0.536,
160
+ "stderr": 0.0223
161
+ },
162
+ "xcopa_ta": {
163
+ "version": 1.0,
164
+ "nshot": 0,
165
+ "alias": "- xcopa_ta",
166
+ "acc,none": 0.558,
167
+ "stderr": 0.0222
168
+ },
169
+ "xcopa_th": {
170
+ "version": 1.0,
171
+ "nshot": 0,
172
+ "alias": "- xcopa_th",
173
+ "acc,none": 0.562,
174
+ "stderr": 0.0222
175
+ },
176
+ "xcopa_tr": {
177
+ "version": 1.0,
178
+ "nshot": 0,
179
+ "alias": "- xcopa_tr",
180
+ "acc,none": 0.55,
181
+ "stderr": 0.0223
182
+ },
183
+ "xcopa_vi": {
184
+ "version": 1.0,
185
+ "nshot": 0,
186
+ "alias": "- xcopa_vi",
187
+ "acc,none": 0.714,
188
+ "stderr": 0.0202
189
+ },
190
+ "xcopa_zh": {
191
+ "version": 1.0,
192
+ "nshot": 0,
193
+ "alias": "- xcopa_zh",
194
+ "acc,none": 0.724,
195
+ "stderr": 0.02
196
+ },
197
+ "xnli": {
198
+ "nshot": 0,
199
+ "alias": "xnli",
200
+ "acc,none": 0.3811,
201
+ "stderr": 0.0024
202
+ },
203
+ "xnli_ar": {
204
+ "version": 1.0,
205
+ "nshot": 0,
206
+ "alias": "- xnli_ar",
207
+ "acc,none": 0.3361,
208
+ "stderr": 0.0095
209
+ },
210
+ "xnli_bg": {
211
+ "version": 1.0,
212
+ "nshot": 0,
213
+ "alias": "- xnli_bg",
214
+ "acc,none": 0.3787,
215
+ "stderr": 0.0097
216
+ },
217
+ "xnli_de": {
218
+ "version": 1.0,
219
+ "nshot": 0,
220
+ "alias": "- xnli_de",
221
+ "acc,none": 0.4309,
222
+ "stderr": 0.0099
223
+ },
224
+ "xnli_el": {
225
+ "version": 1.0,
226
+ "nshot": 0,
227
+ "alias": "- xnli_el",
228
+ "acc,none": 0.3281,
229
+ "stderr": 0.0094
230
+ },
231
+ "xnli_en": {
232
+ "version": 1.0,
233
+ "nshot": 0,
234
+ "alias": "- xnli_en",
235
+ "acc,none": 0.4719,
236
+ "stderr": 0.01
237
+ },
238
+ "xnli_es": {
239
+ "version": 1.0,
240
+ "nshot": 0,
241
+ "alias": "- xnli_es",
242
+ "acc,none": 0.3827,
243
+ "stderr": 0.0097
244
+ },
245
+ "xnli_eu": {
246
+ "version": 1.0,
247
+ "nshot": 0,
248
+ "alias": "- xnli_eu",
249
+ "acc,none": 0.3505,
250
+ "stderr": 0.0067
251
+ },
252
+ "xnli_fr": {
253
+ "version": 1.0,
254
+ "nshot": 0,
255
+ "alias": "- xnli_fr",
256
+ "acc,none": 0.4446,
257
+ "stderr": 0.01
258
+ },
259
+ "xnli_hi": {
260
+ "version": 1.0,
261
+ "nshot": 0,
262
+ "alias": "- xnli_hi",
263
+ "acc,none": 0.3482,
264
+ "stderr": 0.0095
265
+ },
266
+ "xnli_ru": {
267
+ "version": 1.0,
268
+ "nshot": 0,
269
+ "alias": "- xnli_ru",
270
+ "acc,none": 0.4201,
271
+ "stderr": 0.0099
272
+ },
273
+ "xnli_sw": {
274
+ "version": 1.0,
275
+ "nshot": 0,
276
+ "alias": "- xnli_sw",
277
+ "acc,none": 0.3655,
278
+ "stderr": 0.0097
279
+ },
280
+ "xnli_th": {
281
+ "version": 1.0,
282
+ "nshot": 0,
283
+ "alias": "- xnli_th",
284
+ "acc,none": 0.402,
285
+ "stderr": 0.0098
286
+ },
287
+ "xnli_tr": {
288
+ "version": 1.0,
289
+ "nshot": 0,
290
+ "alias": "- xnli_tr",
291
+ "acc,none": 0.3799,
292
+ "stderr": 0.0097
293
+ },
294
+ "xnli_ur": {
295
+ "version": 1.0,
296
+ "nshot": 0,
297
+ "alias": "- xnli_ur",
298
+ "acc,none": 0.3361,
299
+ "stderr": 0.0095
300
+ },
301
+ "xnli_vi": {
302
+ "version": 1.0,
303
+ "nshot": 0,
304
+ "alias": "- xnli_vi",
305
+ "acc,none": 0.3984,
306
+ "stderr": 0.0098
307
+ },
308
+ "xnli_zh": {
309
+ "version": 1.0,
310
+ "nshot": 0,
311
+ "alias": "- xnli_zh",
312
+ "acc,none": 0.355,
313
+ "stderr": 0.0096
314
+ },
315
+ "xstorycloze": {
316
+ "nshot": 0,
317
+ "alias": "xstorycloze",
318
+ "acc,none": 0.5961,
319
+ "stderr": 0.0038
320
+ },
321
+ "xstorycloze_ar": {
322
+ "version": 1.0,
323
+ "nshot": 0,
324
+ "alias": "- xstorycloze_ar",
325
+ "acc,none": 0.5837,
326
+ "stderr": 0.0127
327
+ },
328
+ "xstorycloze_en": {
329
+ "version": 1.0,
330
+ "nshot": 0,
331
+ "alias": "- xstorycloze_en",
332
+ "acc,none": 0.7551,
333
+ "stderr": 0.0111
334
+ },
335
+ "xstorycloze_es": {
336
+ "version": 1.0,
337
+ "nshot": 0,
338
+ "alias": "- xstorycloze_es",
339
+ "acc,none": 0.6724,
340
+ "stderr": 0.0121
341
+ },
342
+ "xstorycloze_eu": {
343
+ "version": 1.0,
344
+ "nshot": 0,
345
+ "alias": "- xstorycloze_eu",
346
+ "acc,none": 0.5129,
347
+ "stderr": 0.0129
348
+ },
349
+ "xstorycloze_hi": {
350
+ "version": 1.0,
351
+ "nshot": 0,
352
+ "alias": "- xstorycloze_hi",
353
+ "acc,none": 0.5387,
354
+ "stderr": 0.0128
355
+ },
356
+ "xstorycloze_id": {
357
+ "version": 1.0,
358
+ "nshot": 0,
359
+ "alias": "- xstorycloze_id",
360
+ "acc,none": 0.6367,
361
+ "stderr": 0.0124
362
+ },
363
+ "xstorycloze_my": {
364
+ "version": 1.0,
365
+ "nshot": 0,
366
+ "alias": "- xstorycloze_my",
367
+ "acc,none": 0.497,
368
+ "stderr": 0.0129
369
+ },
370
+ "xstorycloze_ru": {
371
+ "version": 1.0,
372
+ "nshot": 0,
373
+ "alias": "- xstorycloze_ru",
374
+ "acc,none": 0.6578,
375
+ "stderr": 0.0122
376
+ },
377
+ "xstorycloze_sw": {
378
+ "version": 1.0,
379
+ "nshot": 0,
380
+ "alias": "- xstorycloze_sw",
381
+ "acc,none": 0.5149,
382
+ "stderr": 0.0129
383
+ },
384
+ "xstorycloze_te": {
385
+ "version": 1.0,
386
+ "nshot": 0,
387
+ "alias": "- xstorycloze_te",
388
+ "acc,none": 0.5341,
389
+ "stderr": 0.0128
390
+ },
391
+ "xstorycloze_zh": {
392
+ "version": 1.0,
393
+ "nshot": 0,
394
+ "alias": "- xstorycloze_zh",
395
+ "acc,none": 0.6539,
396
+ "stderr": 0.0122
397
+ },
398
+ "xwinograd": {
399
+ "nshot": 0,
400
+ "alias": "xwinograd",
401
+ "acc,none": 0.7977,
402
+ "stderr": 0.0059
403
+ },
404
+ "xwinograd_en": {
405
+ "version": 1.0,
406
+ "nshot": 0,
407
+ "alias": "- xwinograd_en",
408
+ "acc,none": 0.8563,
409
+ "stderr": 0.0073
410
+ },
411
+ "xwinograd_fr": {
412
+ "version": 1.0,
413
+ "nshot": 0,
414
+ "alias": "- xwinograd_fr",
415
+ "acc,none": 0.7108,
416
+ "stderr": 0.0501
417
+ },
418
+ "xwinograd_jp": {
419
+ "version": 1.0,
420
+ "nshot": 0,
421
+ "alias": "- xwinograd_jp",
422
+ "acc,none": 0.7049,
423
+ "stderr": 0.0147
424
+ },
425
+ "xwinograd_pt": {
426
+ "version": 1.0,
427
+ "nshot": 0,
428
+ "alias": "- xwinograd_pt",
429
+ "acc,none": 0.7452,
430
+ "stderr": 0.0269
431
+ },
432
+ "xwinograd_ru": {
433
+ "version": 1.0,
434
+ "nshot": 0,
435
+ "alias": "- xwinograd_ru",
436
+ "acc,none": 0.6889,
437
+ "stderr": 0.0261
438
+ },
439
+ "xwinograd_zh": {
440
+ "version": 1.0,
441
+ "nshot": 0,
442
+ "alias": "- xwinograd_zh",
443
+ "acc,none": 0.8135,
444
+ "stderr": 0.0174
445
+ }
446
+ }
447
+ }