task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932573,0 anli_r2,acc,0.336,0.014944140233795025,0 anli_r3,acc,0.335,0.013630871843821476,0 arc_challenge,acc,0.27303754266211605,0.013019332762635734,0 arc_challenge,acc_norm,0.2815699658703072,0.013143376735009024,0 arc_easy,acc,0.5597643097643098,0.010186228624515651,0 arc_easy,acc_norm,0.4978956228956229,0.010259692651537049,0 boolq,acc,0.5892966360856269,0.008604460608471413,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.2593406593406593,,1 copa,acc,0.74,0.04408440022768077,0 hellaswag,acc,0.43616809400517825,0.004948952519517524,0 hellaswag,acc_norm,0.563931487751444,0.004948824501355473,0 piqa,acc,0.7366702937976061,0.010276185322196764,0 piqa,acc_norm,0.7383025027203483,0.010255630772708227,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.802,0.012607733934175315,0 sciq,acc_norm,0.713,0.014312087053809961,0 storycloze_2016,acc,0.6916087653661144,0.010679734445487801,0 winogrande,acc,0.5485398579321231,0.01398611030101776,0