NLPGenius's picture
Upload folder using huggingface_hub
490a6b0 verified
{
"best_metric": 0.04577361047267914,
"best_model_checkpoint": "./phishing-email-detection/checkpoint-549",
"epoch": 1.0,
"eval_steps": 1,
"global_step": 549,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018214936247723133,
"grad_norm": 3.5286669731140137,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7387,
"step": 1
},
{
"epoch": 0.0036429872495446266,
"grad_norm": 5.887165069580078,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.7757,
"step": 2
},
{
"epoch": 0.00546448087431694,
"grad_norm": 4.956603050231934,
"learning_rate": 3e-06,
"loss": 0.7739,
"step": 3
},
{
"epoch": 0.007285974499089253,
"grad_norm": 1.441901683807373,
"learning_rate": 4.000000000000001e-06,
"loss": 0.6558,
"step": 4
},
{
"epoch": 0.009107468123861567,
"grad_norm": 2.219719648361206,
"learning_rate": 5e-06,
"loss": 0.7077,
"step": 5
},
{
"epoch": 0.01092896174863388,
"grad_norm": 3.6860222816467285,
"learning_rate": 6e-06,
"loss": 0.7193,
"step": 6
},
{
"epoch": 0.012750455373406194,
"grad_norm": 4.651106834411621,
"learning_rate": 7.000000000000001e-06,
"loss": 0.7194,
"step": 7
},
{
"epoch": 0.014571948998178506,
"grad_norm": 2.381688117980957,
"learning_rate": 8.000000000000001e-06,
"loss": 0.6798,
"step": 8
},
{
"epoch": 0.01639344262295082,
"grad_norm": 2.163804292678833,
"learning_rate": 9e-06,
"loss": 0.6845,
"step": 9
},
{
"epoch": 0.018214936247723135,
"grad_norm": 3.0246245861053467,
"learning_rate": 1e-05,
"loss": 0.6573,
"step": 10
},
{
"epoch": 0.020036429872495445,
"grad_norm": 7.085489273071289,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.7286,
"step": 11
},
{
"epoch": 0.02185792349726776,
"grad_norm": 3.3664584159851074,
"learning_rate": 1.2e-05,
"loss": 0.7074,
"step": 12
},
{
"epoch": 0.023679417122040074,
"grad_norm": 2.2570459842681885,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.64,
"step": 13
},
{
"epoch": 0.025500910746812388,
"grad_norm": 2.044220447540283,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.6568,
"step": 14
},
{
"epoch": 0.0273224043715847,
"grad_norm": 2.8037195205688477,
"learning_rate": 1.5e-05,
"loss": 0.6471,
"step": 15
},
{
"epoch": 0.029143897996357013,
"grad_norm": 1.8997001647949219,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.6499,
"step": 16
},
{
"epoch": 0.030965391621129327,
"grad_norm": 3.3398947715759277,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.5904,
"step": 17
},
{
"epoch": 0.03278688524590164,
"grad_norm": 2.688948631286621,
"learning_rate": 1.8e-05,
"loss": 0.6029,
"step": 18
},
{
"epoch": 0.03460837887067395,
"grad_norm": 2.952101469039917,
"learning_rate": 1.9e-05,
"loss": 0.6377,
"step": 19
},
{
"epoch": 0.03642987249544627,
"grad_norm": 3.182657480239868,
"learning_rate": 2e-05,
"loss": 0.6495,
"step": 20
},
{
"epoch": 0.03825136612021858,
"grad_norm": 5.169290542602539,
"learning_rate": 2.1e-05,
"loss": 0.6297,
"step": 21
},
{
"epoch": 0.04007285974499089,
"grad_norm": 4.624154090881348,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.637,
"step": 22
},
{
"epoch": 0.04189435336976321,
"grad_norm": 3.911428689956665,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.5452,
"step": 23
},
{
"epoch": 0.04371584699453552,
"grad_norm": 4.3677897453308105,
"learning_rate": 2.4e-05,
"loss": 0.5605,
"step": 24
},
{
"epoch": 0.04553734061930783,
"grad_norm": 2.0296688079833984,
"learning_rate": 2.5e-05,
"loss": 0.5526,
"step": 25
},
{
"epoch": 0.04735883424408015,
"grad_norm": 5.974031925201416,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.512,
"step": 26
},
{
"epoch": 0.04918032786885246,
"grad_norm": 3.5385329723358154,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.5212,
"step": 27
},
{
"epoch": 0.051001821493624776,
"grad_norm": 4.718818187713623,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.4359,
"step": 28
},
{
"epoch": 0.052823315118397086,
"grad_norm": 3.893428325653076,
"learning_rate": 2.9e-05,
"loss": 0.4226,
"step": 29
},
{
"epoch": 0.0546448087431694,
"grad_norm": 4.146342754364014,
"learning_rate": 3e-05,
"loss": 0.4003,
"step": 30
},
{
"epoch": 0.056466302367941715,
"grad_norm": 5.218969821929932,
"learning_rate": 3.1e-05,
"loss": 0.3837,
"step": 31
},
{
"epoch": 0.058287795992714025,
"grad_norm": 6.411149024963379,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.4135,
"step": 32
},
{
"epoch": 0.060109289617486336,
"grad_norm": 3.4569814205169678,
"learning_rate": 3.3e-05,
"loss": 0.2921,
"step": 33
},
{
"epoch": 0.061930783242258654,
"grad_norm": 4.673402309417725,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.3708,
"step": 34
},
{
"epoch": 0.06375227686703097,
"grad_norm": 4.357000827789307,
"learning_rate": 3.5e-05,
"loss": 0.3005,
"step": 35
},
{
"epoch": 0.06557377049180328,
"grad_norm": 2.4127838611602783,
"learning_rate": 3.6e-05,
"loss": 0.1953,
"step": 36
},
{
"epoch": 0.06739526411657559,
"grad_norm": 8.710689544677734,
"learning_rate": 3.7e-05,
"loss": 0.3076,
"step": 37
},
{
"epoch": 0.0692167577413479,
"grad_norm": 3.4053244590759277,
"learning_rate": 3.8e-05,
"loss": 0.1865,
"step": 38
},
{
"epoch": 0.07103825136612021,
"grad_norm": 7.373399257659912,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.244,
"step": 39
},
{
"epoch": 0.07285974499089254,
"grad_norm": 3.437110424041748,
"learning_rate": 4e-05,
"loss": 0.3219,
"step": 40
},
{
"epoch": 0.07468123861566485,
"grad_norm": 4.024507999420166,
"learning_rate": 4.1e-05,
"loss": 0.1873,
"step": 41
},
{
"epoch": 0.07650273224043716,
"grad_norm": 5.076328277587891,
"learning_rate": 4.2e-05,
"loss": 0.1642,
"step": 42
},
{
"epoch": 0.07832422586520947,
"grad_norm": 1.7894034385681152,
"learning_rate": 4.3e-05,
"loss": 0.0831,
"step": 43
},
{
"epoch": 0.08014571948998178,
"grad_norm": 2.9944159984588623,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.114,
"step": 44
},
{
"epoch": 0.08196721311475409,
"grad_norm": 2.9631662368774414,
"learning_rate": 4.5e-05,
"loss": 0.0638,
"step": 45
},
{
"epoch": 0.08378870673952642,
"grad_norm": 3.803802251815796,
"learning_rate": 4.600000000000001e-05,
"loss": 0.2082,
"step": 46
},
{
"epoch": 0.08561020036429873,
"grad_norm": 10.075150489807129,
"learning_rate": 4.7e-05,
"loss": 0.1128,
"step": 47
},
{
"epoch": 0.08743169398907104,
"grad_norm": 8.686117172241211,
"learning_rate": 4.8e-05,
"loss": 0.2981,
"step": 48
},
{
"epoch": 0.08925318761384335,
"grad_norm": 6.119321346282959,
"learning_rate": 4.9e-05,
"loss": 0.227,
"step": 49
},
{
"epoch": 0.09107468123861566,
"grad_norm": 1.3178825378417969,
"learning_rate": 5e-05,
"loss": 0.0379,
"step": 50
},
{
"epoch": 0.09289617486338798,
"grad_norm": 14.058587074279785,
"learning_rate": 4.996869129618034e-05,
"loss": 0.2593,
"step": 51
},
{
"epoch": 0.0947176684881603,
"grad_norm": 3.1873652935028076,
"learning_rate": 4.993738259236068e-05,
"loss": 0.0374,
"step": 52
},
{
"epoch": 0.0965391621129326,
"grad_norm": 14.453207015991211,
"learning_rate": 4.990607388854102e-05,
"loss": 0.3941,
"step": 53
},
{
"epoch": 0.09836065573770492,
"grad_norm": 1.3862441778182983,
"learning_rate": 4.9874765184721355e-05,
"loss": 0.1079,
"step": 54
},
{
"epoch": 0.10018214936247723,
"grad_norm": 16.57040023803711,
"learning_rate": 4.984345648090169e-05,
"loss": 0.5086,
"step": 55
},
{
"epoch": 0.10200364298724955,
"grad_norm": 19.026596069335938,
"learning_rate": 4.981214777708203e-05,
"loss": 0.3344,
"step": 56
},
{
"epoch": 0.10382513661202186,
"grad_norm": 10.873573303222656,
"learning_rate": 4.978083907326237e-05,
"loss": 0.145,
"step": 57
},
{
"epoch": 0.10564663023679417,
"grad_norm": 12.884831428527832,
"learning_rate": 4.974953036944271e-05,
"loss": 0.408,
"step": 58
},
{
"epoch": 0.10746812386156648,
"grad_norm": 13.81652545928955,
"learning_rate": 4.9718221665623046e-05,
"loss": 0.1871,
"step": 59
},
{
"epoch": 0.1092896174863388,
"grad_norm": 9.276785850524902,
"learning_rate": 4.9686912961803384e-05,
"loss": 0.1794,
"step": 60
},
{
"epoch": 0.1111111111111111,
"grad_norm": 3.9966280460357666,
"learning_rate": 4.965560425798372e-05,
"loss": 0.1224,
"step": 61
},
{
"epoch": 0.11293260473588343,
"grad_norm": 9.78342056274414,
"learning_rate": 4.962429555416406e-05,
"loss": 0.2712,
"step": 62
},
{
"epoch": 0.11475409836065574,
"grad_norm": 3.808826446533203,
"learning_rate": 4.95929868503444e-05,
"loss": 0.0539,
"step": 63
},
{
"epoch": 0.11657559198542805,
"grad_norm": 2.176527500152588,
"learning_rate": 4.9561678146524736e-05,
"loss": 0.0676,
"step": 64
},
{
"epoch": 0.11839708561020036,
"grad_norm": 1.8019678592681885,
"learning_rate": 4.9530369442705075e-05,
"loss": 0.1436,
"step": 65
},
{
"epoch": 0.12021857923497267,
"grad_norm": 2.831108570098877,
"learning_rate": 4.949906073888541e-05,
"loss": 0.0529,
"step": 66
},
{
"epoch": 0.122040072859745,
"grad_norm": 3.580284357070923,
"learning_rate": 4.946775203506575e-05,
"loss": 0.0522,
"step": 67
},
{
"epoch": 0.12386156648451731,
"grad_norm": 1.6722187995910645,
"learning_rate": 4.943644333124609e-05,
"loss": 0.0587,
"step": 68
},
{
"epoch": 0.12568306010928962,
"grad_norm": 2.407590866088867,
"learning_rate": 4.940513462742643e-05,
"loss": 0.1156,
"step": 69
},
{
"epoch": 0.12750455373406194,
"grad_norm": 9.305254936218262,
"learning_rate": 4.9373825923606765e-05,
"loss": 0.0965,
"step": 70
},
{
"epoch": 0.12932604735883424,
"grad_norm": 7.728682994842529,
"learning_rate": 4.93425172197871e-05,
"loss": 0.1213,
"step": 71
},
{
"epoch": 0.13114754098360656,
"grad_norm": 2.370173454284668,
"learning_rate": 4.931120851596744e-05,
"loss": 0.0458,
"step": 72
},
{
"epoch": 0.13296903460837886,
"grad_norm": 15.143721580505371,
"learning_rate": 4.927989981214778e-05,
"loss": 0.358,
"step": 73
},
{
"epoch": 0.13479052823315119,
"grad_norm": 7.329561233520508,
"learning_rate": 4.924859110832812e-05,
"loss": 0.0722,
"step": 74
},
{
"epoch": 0.1366120218579235,
"grad_norm": 4.661482810974121,
"learning_rate": 4.9217282404508456e-05,
"loss": 0.1194,
"step": 75
},
{
"epoch": 0.1384335154826958,
"grad_norm": 7.151434898376465,
"learning_rate": 4.9185973700688794e-05,
"loss": 0.0753,
"step": 76
},
{
"epoch": 0.14025500910746813,
"grad_norm": 8.931604385375977,
"learning_rate": 4.915466499686913e-05,
"loss": 0.1828,
"step": 77
},
{
"epoch": 0.14207650273224043,
"grad_norm": 9.410967826843262,
"learning_rate": 4.912335629304947e-05,
"loss": 0.0992,
"step": 78
},
{
"epoch": 0.14389799635701275,
"grad_norm": 5.235998153686523,
"learning_rate": 4.909204758922981e-05,
"loss": 0.2115,
"step": 79
},
{
"epoch": 0.14571948998178508,
"grad_norm": 5.031798839569092,
"learning_rate": 4.906073888541015e-05,
"loss": 0.0448,
"step": 80
},
{
"epoch": 0.14754098360655737,
"grad_norm": 1.3853205442428589,
"learning_rate": 4.9029430181590485e-05,
"loss": 0.0279,
"step": 81
},
{
"epoch": 0.1493624772313297,
"grad_norm": 7.063821792602539,
"learning_rate": 4.899812147777082e-05,
"loss": 0.1193,
"step": 82
},
{
"epoch": 0.151183970856102,
"grad_norm": 12.036198616027832,
"learning_rate": 4.896681277395116e-05,
"loss": 0.2752,
"step": 83
},
{
"epoch": 0.15300546448087432,
"grad_norm": 8.796091079711914,
"learning_rate": 4.89355040701315e-05,
"loss": 0.2198,
"step": 84
},
{
"epoch": 0.15482695810564662,
"grad_norm": 0.6930309534072876,
"learning_rate": 4.890419536631184e-05,
"loss": 0.0153,
"step": 85
},
{
"epoch": 0.15664845173041894,
"grad_norm": 6.166244029998779,
"learning_rate": 4.8872886662492175e-05,
"loss": 0.163,
"step": 86
},
{
"epoch": 0.15846994535519127,
"grad_norm": 4.07868766784668,
"learning_rate": 4.8841577958672514e-05,
"loss": 0.0629,
"step": 87
},
{
"epoch": 0.16029143897996356,
"grad_norm": 6.002202033996582,
"learning_rate": 4.881026925485285e-05,
"loss": 0.1418,
"step": 88
},
{
"epoch": 0.1621129326047359,
"grad_norm": 5.126954078674316,
"learning_rate": 4.877896055103319e-05,
"loss": 0.0691,
"step": 89
},
{
"epoch": 0.16393442622950818,
"grad_norm": 8.614744186401367,
"learning_rate": 4.874765184721353e-05,
"loss": 0.1183,
"step": 90
},
{
"epoch": 0.1657559198542805,
"grad_norm": 4.495233535766602,
"learning_rate": 4.8716343143393866e-05,
"loss": 0.1058,
"step": 91
},
{
"epoch": 0.16757741347905283,
"grad_norm": 2.912471294403076,
"learning_rate": 4.8685034439574204e-05,
"loss": 0.0322,
"step": 92
},
{
"epoch": 0.16939890710382513,
"grad_norm": 2.786748170852661,
"learning_rate": 4.865372573575454e-05,
"loss": 0.0289,
"step": 93
},
{
"epoch": 0.17122040072859745,
"grad_norm": 2.2987825870513916,
"learning_rate": 4.862241703193488e-05,
"loss": 0.0647,
"step": 94
},
{
"epoch": 0.17304189435336975,
"grad_norm": 2.2475061416625977,
"learning_rate": 4.859110832811522e-05,
"loss": 0.0384,
"step": 95
},
{
"epoch": 0.17486338797814208,
"grad_norm": 4.809598445892334,
"learning_rate": 4.855979962429556e-05,
"loss": 0.1032,
"step": 96
},
{
"epoch": 0.1766848816029144,
"grad_norm": 4.917390823364258,
"learning_rate": 4.8528490920475895e-05,
"loss": 0.1083,
"step": 97
},
{
"epoch": 0.1785063752276867,
"grad_norm": 1.0454902648925781,
"learning_rate": 4.849718221665623e-05,
"loss": 0.0186,
"step": 98
},
{
"epoch": 0.18032786885245902,
"grad_norm": 3.4447038173675537,
"learning_rate": 4.846587351283657e-05,
"loss": 0.0509,
"step": 99
},
{
"epoch": 0.18214936247723132,
"grad_norm": 6.711843490600586,
"learning_rate": 4.843456480901691e-05,
"loss": 0.0718,
"step": 100
},
{
"epoch": 0.18397085610200364,
"grad_norm": 3.19818377494812,
"learning_rate": 4.840325610519725e-05,
"loss": 0.0889,
"step": 101
},
{
"epoch": 0.18579234972677597,
"grad_norm": 10.022863388061523,
"learning_rate": 4.8371947401377586e-05,
"loss": 0.1713,
"step": 102
},
{
"epoch": 0.18761384335154827,
"grad_norm": 0.2765645682811737,
"learning_rate": 4.8340638697557924e-05,
"loss": 0.0049,
"step": 103
},
{
"epoch": 0.1894353369763206,
"grad_norm": 6.575446605682373,
"learning_rate": 4.830932999373826e-05,
"loss": 0.2178,
"step": 104
},
{
"epoch": 0.1912568306010929,
"grad_norm": 6.493257522583008,
"learning_rate": 4.82780212899186e-05,
"loss": 0.1347,
"step": 105
},
{
"epoch": 0.1930783242258652,
"grad_norm": 7.500410556793213,
"learning_rate": 4.824671258609894e-05,
"loss": 0.0779,
"step": 106
},
{
"epoch": 0.19489981785063754,
"grad_norm": 9.324666976928711,
"learning_rate": 4.8215403882279276e-05,
"loss": 0.3894,
"step": 107
},
{
"epoch": 0.19672131147540983,
"grad_norm": 2.570295810699463,
"learning_rate": 4.8184095178459615e-05,
"loss": 0.0145,
"step": 108
},
{
"epoch": 0.19854280510018216,
"grad_norm": 2.4578723907470703,
"learning_rate": 4.815278647463995e-05,
"loss": 0.0212,
"step": 109
},
{
"epoch": 0.20036429872495445,
"grad_norm": 4.467875957489014,
"learning_rate": 4.812147777082029e-05,
"loss": 0.0766,
"step": 110
},
{
"epoch": 0.20218579234972678,
"grad_norm": 8.017147064208984,
"learning_rate": 4.809016906700063e-05,
"loss": 0.2337,
"step": 111
},
{
"epoch": 0.2040072859744991,
"grad_norm": 7.299202919006348,
"learning_rate": 4.805886036318097e-05,
"loss": 0.2099,
"step": 112
},
{
"epoch": 0.2058287795992714,
"grad_norm": 1.8824843168258667,
"learning_rate": 4.8027551659361305e-05,
"loss": 0.0292,
"step": 113
},
{
"epoch": 0.20765027322404372,
"grad_norm": 0.450076699256897,
"learning_rate": 4.7996242955541643e-05,
"loss": 0.0103,
"step": 114
},
{
"epoch": 0.20947176684881602,
"grad_norm": 6.788011074066162,
"learning_rate": 4.796493425172198e-05,
"loss": 0.074,
"step": 115
},
{
"epoch": 0.21129326047358835,
"grad_norm": 6.855130195617676,
"learning_rate": 4.793362554790232e-05,
"loss": 0.0793,
"step": 116
},
{
"epoch": 0.21311475409836064,
"grad_norm": 6.564332962036133,
"learning_rate": 4.790231684408266e-05,
"loss": 0.061,
"step": 117
},
{
"epoch": 0.21493624772313297,
"grad_norm": 3.254970073699951,
"learning_rate": 4.7871008140262996e-05,
"loss": 0.0462,
"step": 118
},
{
"epoch": 0.2167577413479053,
"grad_norm": 0.28988149762153625,
"learning_rate": 4.7839699436443334e-05,
"loss": 0.0079,
"step": 119
},
{
"epoch": 0.2185792349726776,
"grad_norm": 2.863494634628296,
"learning_rate": 4.780839073262367e-05,
"loss": 0.0674,
"step": 120
},
{
"epoch": 0.2204007285974499,
"grad_norm": 2.85598087310791,
"learning_rate": 4.777708202880401e-05,
"loss": 0.1649,
"step": 121
},
{
"epoch": 0.2222222222222222,
"grad_norm": 1.740670919418335,
"learning_rate": 4.774577332498435e-05,
"loss": 0.0164,
"step": 122
},
{
"epoch": 0.22404371584699453,
"grad_norm": 1.7192822694778442,
"learning_rate": 4.771446462116469e-05,
"loss": 0.129,
"step": 123
},
{
"epoch": 0.22586520947176686,
"grad_norm": 5.544397830963135,
"learning_rate": 4.7683155917345025e-05,
"loss": 0.0954,
"step": 124
},
{
"epoch": 0.22768670309653916,
"grad_norm": 7.164045810699463,
"learning_rate": 4.765184721352536e-05,
"loss": 0.2136,
"step": 125
},
{
"epoch": 0.22950819672131148,
"grad_norm": 1.1967582702636719,
"learning_rate": 4.76205385097057e-05,
"loss": 0.0317,
"step": 126
},
{
"epoch": 0.23132969034608378,
"grad_norm": 0.9311371445655823,
"learning_rate": 4.758922980588604e-05,
"loss": 0.1102,
"step": 127
},
{
"epoch": 0.2331511839708561,
"grad_norm": 0.8293129205703735,
"learning_rate": 4.755792110206638e-05,
"loss": 0.0218,
"step": 128
},
{
"epoch": 0.23497267759562843,
"grad_norm": 1.2660574913024902,
"learning_rate": 4.7526612398246716e-05,
"loss": 0.0278,
"step": 129
},
{
"epoch": 0.23679417122040072,
"grad_norm": 0.48367011547088623,
"learning_rate": 4.7495303694427054e-05,
"loss": 0.0146,
"step": 130
},
{
"epoch": 0.23861566484517305,
"grad_norm": 0.7170718908309937,
"learning_rate": 4.746399499060739e-05,
"loss": 0.0107,
"step": 131
},
{
"epoch": 0.24043715846994534,
"grad_norm": 4.402022361755371,
"learning_rate": 4.743268628678773e-05,
"loss": 0.1032,
"step": 132
},
{
"epoch": 0.24225865209471767,
"grad_norm": 0.09203081578016281,
"learning_rate": 4.740137758296807e-05,
"loss": 0.0029,
"step": 133
},
{
"epoch": 0.24408014571949,
"grad_norm": 0.09475582838058472,
"learning_rate": 4.7370068879148406e-05,
"loss": 0.0029,
"step": 134
},
{
"epoch": 0.2459016393442623,
"grad_norm": 0.25659048557281494,
"learning_rate": 4.7338760175328744e-05,
"loss": 0.0037,
"step": 135
},
{
"epoch": 0.24772313296903462,
"grad_norm": 5.615557670593262,
"learning_rate": 4.730745147150908e-05,
"loss": 0.0604,
"step": 136
},
{
"epoch": 0.2495446265938069,
"grad_norm": 8.917096138000488,
"learning_rate": 4.727614276768942e-05,
"loss": 0.0719,
"step": 137
},
{
"epoch": 0.25136612021857924,
"grad_norm": 10.325088500976562,
"learning_rate": 4.724483406386976e-05,
"loss": 0.2705,
"step": 138
},
{
"epoch": 0.25318761384335153,
"grad_norm": 0.44606778025627136,
"learning_rate": 4.72135253600501e-05,
"loss": 0.0039,
"step": 139
},
{
"epoch": 0.2550091074681239,
"grad_norm": 5.040135383605957,
"learning_rate": 4.7182216656230435e-05,
"loss": 0.1328,
"step": 140
},
{
"epoch": 0.2568306010928962,
"grad_norm": 2.567768096923828,
"learning_rate": 4.715090795241077e-05,
"loss": 0.0166,
"step": 141
},
{
"epoch": 0.2586520947176685,
"grad_norm": 0.5684707164764404,
"learning_rate": 4.711959924859111e-05,
"loss": 0.0044,
"step": 142
},
{
"epoch": 0.2604735883424408,
"grad_norm": 5.067185878753662,
"learning_rate": 4.708829054477145e-05,
"loss": 0.0219,
"step": 143
},
{
"epoch": 0.26229508196721313,
"grad_norm": 10.094161987304688,
"learning_rate": 4.705698184095179e-05,
"loss": 0.0192,
"step": 144
},
{
"epoch": 0.2641165755919854,
"grad_norm": 3.966435194015503,
"learning_rate": 4.7025673137132126e-05,
"loss": 0.1723,
"step": 145
},
{
"epoch": 0.2659380692167577,
"grad_norm": 8.564971923828125,
"learning_rate": 4.6994364433312464e-05,
"loss": 0.0367,
"step": 146
},
{
"epoch": 0.2677595628415301,
"grad_norm": 1.8184422254562378,
"learning_rate": 4.69630557294928e-05,
"loss": 0.1991,
"step": 147
},
{
"epoch": 0.26958105646630237,
"grad_norm": 0.5665643811225891,
"learning_rate": 4.693174702567314e-05,
"loss": 0.005,
"step": 148
},
{
"epoch": 0.27140255009107467,
"grad_norm": 6.798255920410156,
"learning_rate": 4.690043832185348e-05,
"loss": 0.2519,
"step": 149
},
{
"epoch": 0.273224043715847,
"grad_norm": 0.10390115529298782,
"learning_rate": 4.6869129618033816e-05,
"loss": 0.0023,
"step": 150
},
{
"epoch": 0.2750455373406193,
"grad_norm": 10.828420639038086,
"learning_rate": 4.6837820914214155e-05,
"loss": 0.2426,
"step": 151
},
{
"epoch": 0.2768670309653916,
"grad_norm": 8.121326446533203,
"learning_rate": 4.680651221039449e-05,
"loss": 0.2702,
"step": 152
},
{
"epoch": 0.2786885245901639,
"grad_norm": 9.042678833007812,
"learning_rate": 4.6775203506574824e-05,
"loss": 0.0417,
"step": 153
},
{
"epoch": 0.28051001821493626,
"grad_norm": 0.2314717322587967,
"learning_rate": 4.674389480275517e-05,
"loss": 0.0031,
"step": 154
},
{
"epoch": 0.28233151183970856,
"grad_norm": 0.13141897320747375,
"learning_rate": 4.671258609893551e-05,
"loss": 0.0035,
"step": 155
},
{
"epoch": 0.28415300546448086,
"grad_norm": 7.087045192718506,
"learning_rate": 4.6681277395115845e-05,
"loss": 0.0734,
"step": 156
},
{
"epoch": 0.2859744990892532,
"grad_norm": 4.150629043579102,
"learning_rate": 4.6649968691296183e-05,
"loss": 0.1494,
"step": 157
},
{
"epoch": 0.2877959927140255,
"grad_norm": 3.2965354919433594,
"learning_rate": 4.661865998747652e-05,
"loss": 0.2095,
"step": 158
},
{
"epoch": 0.2896174863387978,
"grad_norm": 4.767673492431641,
"learning_rate": 4.658735128365686e-05,
"loss": 0.0516,
"step": 159
},
{
"epoch": 0.29143897996357016,
"grad_norm": 0.28837457299232483,
"learning_rate": 4.65560425798372e-05,
"loss": 0.0066,
"step": 160
},
{
"epoch": 0.29326047358834245,
"grad_norm": 2.4652457237243652,
"learning_rate": 4.6524733876017536e-05,
"loss": 0.0242,
"step": 161
},
{
"epoch": 0.29508196721311475,
"grad_norm": 8.974453926086426,
"learning_rate": 4.6493425172197874e-05,
"loss": 0.131,
"step": 162
},
{
"epoch": 0.29690346083788705,
"grad_norm": 6.204460144042969,
"learning_rate": 4.646211646837821e-05,
"loss": 0.0939,
"step": 163
},
{
"epoch": 0.2987249544626594,
"grad_norm": 3.9641923904418945,
"learning_rate": 4.643080776455855e-05,
"loss": 0.0657,
"step": 164
},
{
"epoch": 0.3005464480874317,
"grad_norm": 0.4072006940841675,
"learning_rate": 4.639949906073889e-05,
"loss": 0.0094,
"step": 165
},
{
"epoch": 0.302367941712204,
"grad_norm": 6.755229473114014,
"learning_rate": 4.636819035691923e-05,
"loss": 0.0565,
"step": 166
},
{
"epoch": 0.30418943533697634,
"grad_norm": 3.4620673656463623,
"learning_rate": 4.6336881653099565e-05,
"loss": 0.118,
"step": 167
},
{
"epoch": 0.30601092896174864,
"grad_norm": 2.3143203258514404,
"learning_rate": 4.63055729492799e-05,
"loss": 0.0329,
"step": 168
},
{
"epoch": 0.30783242258652094,
"grad_norm": 4.3909502029418945,
"learning_rate": 4.627426424546024e-05,
"loss": 0.0331,
"step": 169
},
{
"epoch": 0.30965391621129323,
"grad_norm": 4.431717872619629,
"learning_rate": 4.624295554164057e-05,
"loss": 0.2422,
"step": 170
},
{
"epoch": 0.3114754098360656,
"grad_norm": 5.801990985870361,
"learning_rate": 4.621164683782092e-05,
"loss": 0.1696,
"step": 171
},
{
"epoch": 0.3132969034608379,
"grad_norm": 3.9410240650177,
"learning_rate": 4.6180338134001256e-05,
"loss": 0.105,
"step": 172
},
{
"epoch": 0.3151183970856102,
"grad_norm": 3.1621594429016113,
"learning_rate": 4.6149029430181594e-05,
"loss": 0.1192,
"step": 173
},
{
"epoch": 0.31693989071038253,
"grad_norm": 0.6451271772384644,
"learning_rate": 4.611772072636193e-05,
"loss": 0.0161,
"step": 174
},
{
"epoch": 0.31876138433515483,
"grad_norm": 2.4215264320373535,
"learning_rate": 4.608641202254227e-05,
"loss": 0.1183,
"step": 175
},
{
"epoch": 0.3205828779599271,
"grad_norm": 1.0982496738433838,
"learning_rate": 4.605510331872261e-05,
"loss": 0.0156,
"step": 176
},
{
"epoch": 0.3224043715846995,
"grad_norm": 3.5166165828704834,
"learning_rate": 4.6023794614902946e-05,
"loss": 0.0369,
"step": 177
},
{
"epoch": 0.3242258652094718,
"grad_norm": 9.985152244567871,
"learning_rate": 4.5992485911083284e-05,
"loss": 0.2611,
"step": 178
},
{
"epoch": 0.32604735883424407,
"grad_norm": 7.556087493896484,
"learning_rate": 4.596117720726362e-05,
"loss": 0.1196,
"step": 179
},
{
"epoch": 0.32786885245901637,
"grad_norm": 5.1856255531311035,
"learning_rate": 4.5929868503443954e-05,
"loss": 0.0543,
"step": 180
},
{
"epoch": 0.3296903460837887,
"grad_norm": 2.5084640979766846,
"learning_rate": 4.58985597996243e-05,
"loss": 0.0427,
"step": 181
},
{
"epoch": 0.331511839708561,
"grad_norm": 4.249439716339111,
"learning_rate": 4.586725109580464e-05,
"loss": 0.0918,
"step": 182
},
{
"epoch": 0.3333333333333333,
"grad_norm": 2.1144330501556396,
"learning_rate": 4.5835942391984975e-05,
"loss": 0.0206,
"step": 183
},
{
"epoch": 0.33515482695810567,
"grad_norm": 3.1298768520355225,
"learning_rate": 4.580463368816531e-05,
"loss": 0.2067,
"step": 184
},
{
"epoch": 0.33697632058287796,
"grad_norm": 0.23244377970695496,
"learning_rate": 4.577332498434565e-05,
"loss": 0.0054,
"step": 185
},
{
"epoch": 0.33879781420765026,
"grad_norm": 5.3826165199279785,
"learning_rate": 4.574201628052599e-05,
"loss": 0.0849,
"step": 186
},
{
"epoch": 0.3406193078324226,
"grad_norm": 1.491070032119751,
"learning_rate": 4.571070757670632e-05,
"loss": 0.0188,
"step": 187
},
{
"epoch": 0.3424408014571949,
"grad_norm": 4.769292831420898,
"learning_rate": 4.5679398872886666e-05,
"loss": 0.0418,
"step": 188
},
{
"epoch": 0.3442622950819672,
"grad_norm": 2.514923334121704,
"learning_rate": 4.5648090169067004e-05,
"loss": 0.0264,
"step": 189
},
{
"epoch": 0.3460837887067395,
"grad_norm": 2.249234914779663,
"learning_rate": 4.561678146524734e-05,
"loss": 0.0439,
"step": 190
},
{
"epoch": 0.34790528233151186,
"grad_norm": 3.586806535720825,
"learning_rate": 4.558547276142768e-05,
"loss": 0.0383,
"step": 191
},
{
"epoch": 0.34972677595628415,
"grad_norm": 0.31262052059173584,
"learning_rate": 4.555416405760802e-05,
"loss": 0.0061,
"step": 192
},
{
"epoch": 0.35154826958105645,
"grad_norm": 0.5122743844985962,
"learning_rate": 4.5522855353788357e-05,
"loss": 0.0079,
"step": 193
},
{
"epoch": 0.3533697632058288,
"grad_norm": 6.9489970207214355,
"learning_rate": 4.5491546649968695e-05,
"loss": 0.0527,
"step": 194
},
{
"epoch": 0.3551912568306011,
"grad_norm": 0.07147891819477081,
"learning_rate": 4.546023794614903e-05,
"loss": 0.0026,
"step": 195
},
{
"epoch": 0.3570127504553734,
"grad_norm": 0.07803834974765778,
"learning_rate": 4.542892924232937e-05,
"loss": 0.0026,
"step": 196
},
{
"epoch": 0.3588342440801457,
"grad_norm": 9.417202949523926,
"learning_rate": 4.53976205385097e-05,
"loss": 0.0472,
"step": 197
},
{
"epoch": 0.36065573770491804,
"grad_norm": 15.531563758850098,
"learning_rate": 4.536631183469005e-05,
"loss": 0.3136,
"step": 198
},
{
"epoch": 0.36247723132969034,
"grad_norm": 1.265466332435608,
"learning_rate": 4.5335003130870385e-05,
"loss": 0.0104,
"step": 199
},
{
"epoch": 0.36429872495446264,
"grad_norm": 0.04873238131403923,
"learning_rate": 4.5303694427050724e-05,
"loss": 0.0016,
"step": 200
},
{
"epoch": 0.366120218579235,
"grad_norm": 5.967303276062012,
"learning_rate": 4.527238572323106e-05,
"loss": 0.0346,
"step": 201
},
{
"epoch": 0.3679417122040073,
"grad_norm": 4.412702560424805,
"learning_rate": 4.52410770194114e-05,
"loss": 0.0571,
"step": 202
},
{
"epoch": 0.3697632058287796,
"grad_norm": 4.523514270782471,
"learning_rate": 4.520976831559174e-05,
"loss": 0.1823,
"step": 203
},
{
"epoch": 0.37158469945355194,
"grad_norm": 0.036965470761060715,
"learning_rate": 4.517845961177207e-05,
"loss": 0.0014,
"step": 204
},
{
"epoch": 0.37340619307832423,
"grad_norm": 1.5821716785430908,
"learning_rate": 4.5147150907952414e-05,
"loss": 0.0077,
"step": 205
},
{
"epoch": 0.37522768670309653,
"grad_norm": 9.728230476379395,
"learning_rate": 4.511584220413275e-05,
"loss": 0.1637,
"step": 206
},
{
"epoch": 0.3770491803278688,
"grad_norm": 2.888592481613159,
"learning_rate": 4.5084533500313084e-05,
"loss": 0.016,
"step": 207
},
{
"epoch": 0.3788706739526412,
"grad_norm": 16.919540405273438,
"learning_rate": 4.505322479649343e-05,
"loss": 0.2238,
"step": 208
},
{
"epoch": 0.3806921675774135,
"grad_norm": 7.649754524230957,
"learning_rate": 4.502191609267377e-05,
"loss": 0.0428,
"step": 209
},
{
"epoch": 0.3825136612021858,
"grad_norm": 0.849263072013855,
"learning_rate": 4.4990607388854105e-05,
"loss": 0.0087,
"step": 210
},
{
"epoch": 0.3843351548269581,
"grad_norm": 0.10891333967447281,
"learning_rate": 4.495929868503444e-05,
"loss": 0.0027,
"step": 211
},
{
"epoch": 0.3861566484517304,
"grad_norm": 0.21434666216373444,
"learning_rate": 4.492798998121478e-05,
"loss": 0.0038,
"step": 212
},
{
"epoch": 0.3879781420765027,
"grad_norm": 0.19272175431251526,
"learning_rate": 4.489668127739512e-05,
"loss": 0.0028,
"step": 213
},
{
"epoch": 0.38979963570127507,
"grad_norm": 1.2215018272399902,
"learning_rate": 4.486537257357545e-05,
"loss": 0.0161,
"step": 214
},
{
"epoch": 0.39162112932604737,
"grad_norm": 1.3724066019058228,
"learning_rate": 4.4834063869755796e-05,
"loss": 0.0097,
"step": 215
},
{
"epoch": 0.39344262295081966,
"grad_norm": 1.1924035549163818,
"learning_rate": 4.4802755165936134e-05,
"loss": 0.0066,
"step": 216
},
{
"epoch": 0.39526411657559196,
"grad_norm": 1.784501075744629,
"learning_rate": 4.477144646211647e-05,
"loss": 0.0108,
"step": 217
},
{
"epoch": 0.3970856102003643,
"grad_norm": 0.02774379588663578,
"learning_rate": 4.474013775829681e-05,
"loss": 0.0009,
"step": 218
},
{
"epoch": 0.3989071038251366,
"grad_norm": 0.7612521052360535,
"learning_rate": 4.470882905447715e-05,
"loss": 0.0028,
"step": 219
},
{
"epoch": 0.4007285974499089,
"grad_norm": 18.709510803222656,
"learning_rate": 4.4677520350657486e-05,
"loss": 0.2104,
"step": 220
},
{
"epoch": 0.40255009107468126,
"grad_norm": 16.760061264038086,
"learning_rate": 4.464621164683782e-05,
"loss": 0.2124,
"step": 221
},
{
"epoch": 0.40437158469945356,
"grad_norm": 11.746216773986816,
"learning_rate": 4.461490294301816e-05,
"loss": 0.0412,
"step": 222
},
{
"epoch": 0.40619307832422585,
"grad_norm": 0.067040354013443,
"learning_rate": 4.45835942391985e-05,
"loss": 0.0011,
"step": 223
},
{
"epoch": 0.4080145719489982,
"grad_norm": 0.5200300216674805,
"learning_rate": 4.455228553537883e-05,
"loss": 0.0037,
"step": 224
},
{
"epoch": 0.4098360655737705,
"grad_norm": 8.267187118530273,
"learning_rate": 4.452097683155918e-05,
"loss": 0.1523,
"step": 225
},
{
"epoch": 0.4116575591985428,
"grad_norm": 3.609358549118042,
"learning_rate": 4.4489668127739515e-05,
"loss": 0.1881,
"step": 226
},
{
"epoch": 0.4134790528233151,
"grad_norm": 0.05134233087301254,
"learning_rate": 4.445835942391985e-05,
"loss": 0.0009,
"step": 227
},
{
"epoch": 0.41530054644808745,
"grad_norm": 0.025521283969283104,
"learning_rate": 4.442705072010019e-05,
"loss": 0.0008,
"step": 228
},
{
"epoch": 0.41712204007285975,
"grad_norm": 4.127375602722168,
"learning_rate": 4.439574201628053e-05,
"loss": 0.1777,
"step": 229
},
{
"epoch": 0.41894353369763204,
"grad_norm": 0.06890428066253662,
"learning_rate": 4.436443331246087e-05,
"loss": 0.0013,
"step": 230
},
{
"epoch": 0.4207650273224044,
"grad_norm": 0.10192258656024933,
"learning_rate": 4.43331246086412e-05,
"loss": 0.0011,
"step": 231
},
{
"epoch": 0.4225865209471767,
"grad_norm": 11.624174118041992,
"learning_rate": 4.4301815904821544e-05,
"loss": 0.2336,
"step": 232
},
{
"epoch": 0.424408014571949,
"grad_norm": 7.359781265258789,
"learning_rate": 4.427050720100188e-05,
"loss": 0.0708,
"step": 233
},
{
"epoch": 0.4262295081967213,
"grad_norm": 0.025949914008378983,
"learning_rate": 4.423919849718222e-05,
"loss": 0.0009,
"step": 234
},
{
"epoch": 0.42805100182149364,
"grad_norm": 16.999971389770508,
"learning_rate": 4.420788979336256e-05,
"loss": 0.0829,
"step": 235
},
{
"epoch": 0.42987249544626593,
"grad_norm": 1.726982593536377,
"learning_rate": 4.41765810895429e-05,
"loss": 0.0058,
"step": 236
},
{
"epoch": 0.43169398907103823,
"grad_norm": 5.918796539306641,
"learning_rate": 4.4145272385723235e-05,
"loss": 0.3652,
"step": 237
},
{
"epoch": 0.4335154826958106,
"grad_norm": 0.22916308045387268,
"learning_rate": 4.411396368190357e-05,
"loss": 0.004,
"step": 238
},
{
"epoch": 0.4353369763205829,
"grad_norm": 16.436437606811523,
"learning_rate": 4.408265497808391e-05,
"loss": 0.1857,
"step": 239
},
{
"epoch": 0.4371584699453552,
"grad_norm": 4.63889741897583,
"learning_rate": 4.405134627426425e-05,
"loss": 0.1949,
"step": 240
},
{
"epoch": 0.43897996357012753,
"grad_norm": 3.314641237258911,
"learning_rate": 4.402003757044458e-05,
"loss": 0.0253,
"step": 241
},
{
"epoch": 0.4408014571948998,
"grad_norm": 5.616635322570801,
"learning_rate": 4.3988728866624925e-05,
"loss": 0.3264,
"step": 242
},
{
"epoch": 0.4426229508196721,
"grad_norm": 3.840766191482544,
"learning_rate": 4.3957420162805264e-05,
"loss": 0.1885,
"step": 243
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.6121935844421387,
"learning_rate": 4.39261114589856e-05,
"loss": 0.0043,
"step": 244
},
{
"epoch": 0.44626593806921677,
"grad_norm": 1.0368307828903198,
"learning_rate": 4.389480275516594e-05,
"loss": 0.0139,
"step": 245
},
{
"epoch": 0.44808743169398907,
"grad_norm": 0.8692595958709717,
"learning_rate": 4.386349405134628e-05,
"loss": 0.0116,
"step": 246
},
{
"epoch": 0.44990892531876137,
"grad_norm": 13.962285041809082,
"learning_rate": 4.3832185347526616e-05,
"loss": 0.2261,
"step": 247
},
{
"epoch": 0.4517304189435337,
"grad_norm": 7.238385200500488,
"learning_rate": 4.380087664370695e-05,
"loss": 0.0423,
"step": 248
},
{
"epoch": 0.453551912568306,
"grad_norm": 0.13841496407985687,
"learning_rate": 4.376956793988729e-05,
"loss": 0.0048,
"step": 249
},
{
"epoch": 0.4553734061930783,
"grad_norm": 2.9722912311553955,
"learning_rate": 4.373825923606763e-05,
"loss": 0.1466,
"step": 250
},
{
"epoch": 0.45719489981785066,
"grad_norm": 2.275719404220581,
"learning_rate": 4.370695053224796e-05,
"loss": 0.0137,
"step": 251
},
{
"epoch": 0.45901639344262296,
"grad_norm": 2.344390392303467,
"learning_rate": 4.367564182842831e-05,
"loss": 0.0229,
"step": 252
},
{
"epoch": 0.46083788706739526,
"grad_norm": 2.280900716781616,
"learning_rate": 4.3644333124608645e-05,
"loss": 0.0163,
"step": 253
},
{
"epoch": 0.46265938069216755,
"grad_norm": 0.29969778656959534,
"learning_rate": 4.361302442078898e-05,
"loss": 0.005,
"step": 254
},
{
"epoch": 0.4644808743169399,
"grad_norm": 9.383149147033691,
"learning_rate": 4.358171571696932e-05,
"loss": 0.1589,
"step": 255
},
{
"epoch": 0.4663023679417122,
"grad_norm": 2.4629006385803223,
"learning_rate": 4.355040701314966e-05,
"loss": 0.1433,
"step": 256
},
{
"epoch": 0.4681238615664845,
"grad_norm": 8.262639999389648,
"learning_rate": 4.351909830933e-05,
"loss": 0.0439,
"step": 257
},
{
"epoch": 0.46994535519125685,
"grad_norm": 0.35271739959716797,
"learning_rate": 4.348778960551033e-05,
"loss": 0.0069,
"step": 258
},
{
"epoch": 0.47176684881602915,
"grad_norm": 7.017502784729004,
"learning_rate": 4.3456480901690674e-05,
"loss": 0.0536,
"step": 259
},
{
"epoch": 0.47358834244080145,
"grad_norm": 1.242332935333252,
"learning_rate": 4.342517219787101e-05,
"loss": 0.0127,
"step": 260
},
{
"epoch": 0.47540983606557374,
"grad_norm": 2.1583664417266846,
"learning_rate": 4.339386349405135e-05,
"loss": 0.0214,
"step": 261
},
{
"epoch": 0.4772313296903461,
"grad_norm": 4.674497127532959,
"learning_rate": 4.336255479023169e-05,
"loss": 0.0298,
"step": 262
},
{
"epoch": 0.4790528233151184,
"grad_norm": 1.7035185098648071,
"learning_rate": 4.3331246086412026e-05,
"loss": 0.0105,
"step": 263
},
{
"epoch": 0.4808743169398907,
"grad_norm": 0.12772846221923828,
"learning_rate": 4.3299937382592365e-05,
"loss": 0.0023,
"step": 264
},
{
"epoch": 0.48269581056466304,
"grad_norm": 19.110734939575195,
"learning_rate": 4.3268628678772696e-05,
"loss": 0.4182,
"step": 265
},
{
"epoch": 0.48451730418943534,
"grad_norm": 9.244805335998535,
"learning_rate": 4.323731997495304e-05,
"loss": 0.2689,
"step": 266
},
{
"epoch": 0.48633879781420764,
"grad_norm": 1.423405647277832,
"learning_rate": 4.320601127113338e-05,
"loss": 0.0072,
"step": 267
},
{
"epoch": 0.48816029143898,
"grad_norm": 5.039240837097168,
"learning_rate": 4.317470256731371e-05,
"loss": 0.1362,
"step": 268
},
{
"epoch": 0.4899817850637523,
"grad_norm": 6.24086856842041,
"learning_rate": 4.3143393863494055e-05,
"loss": 0.1773,
"step": 269
},
{
"epoch": 0.4918032786885246,
"grad_norm": 1.5966615676879883,
"learning_rate": 4.3112085159674393e-05,
"loss": 0.1787,
"step": 270
},
{
"epoch": 0.4936247723132969,
"grad_norm": 3.686453104019165,
"learning_rate": 4.308077645585473e-05,
"loss": 0.3175,
"step": 271
},
{
"epoch": 0.49544626593806923,
"grad_norm": 0.04620608314871788,
"learning_rate": 4.304946775203507e-05,
"loss": 0.0016,
"step": 272
},
{
"epoch": 0.4972677595628415,
"grad_norm": 0.20282939076423645,
"learning_rate": 4.301815904821541e-05,
"loss": 0.003,
"step": 273
},
{
"epoch": 0.4990892531876138,
"grad_norm": 0.0827542319893837,
"learning_rate": 4.2986850344395746e-05,
"loss": 0.0029,
"step": 274
},
{
"epoch": 0.5009107468123861,
"grad_norm": 0.29965463280677795,
"learning_rate": 4.295554164057608e-05,
"loss": 0.0063,
"step": 275
},
{
"epoch": 0.5027322404371585,
"grad_norm": 4.543389320373535,
"learning_rate": 4.292423293675642e-05,
"loss": 0.0712,
"step": 276
},
{
"epoch": 0.5045537340619308,
"grad_norm": 2.681236743927002,
"learning_rate": 4.289292423293676e-05,
"loss": 0.0391,
"step": 277
},
{
"epoch": 0.5063752276867031,
"grad_norm": 2.3870694637298584,
"learning_rate": 4.286161552911709e-05,
"loss": 0.028,
"step": 278
},
{
"epoch": 0.5081967213114754,
"grad_norm": 1.0104269981384277,
"learning_rate": 4.283030682529744e-05,
"loss": 0.0179,
"step": 279
},
{
"epoch": 0.5100182149362478,
"grad_norm": 0.17253230512142181,
"learning_rate": 4.2798998121477775e-05,
"loss": 0.0052,
"step": 280
},
{
"epoch": 0.51183970856102,
"grad_norm": 0.27264872193336487,
"learning_rate": 4.276768941765811e-05,
"loss": 0.0086,
"step": 281
},
{
"epoch": 0.5136612021857924,
"grad_norm": 4.211297988891602,
"learning_rate": 4.2736380713838444e-05,
"loss": 0.0984,
"step": 282
},
{
"epoch": 0.5154826958105647,
"grad_norm": 0.8885019421577454,
"learning_rate": 4.270507201001879e-05,
"loss": 0.0076,
"step": 283
},
{
"epoch": 0.517304189435337,
"grad_norm": 0.921626091003418,
"learning_rate": 4.267376330619913e-05,
"loss": 0.0087,
"step": 284
},
{
"epoch": 0.5191256830601093,
"grad_norm": 0.955025851726532,
"learning_rate": 4.264245460237946e-05,
"loss": 0.0131,
"step": 285
},
{
"epoch": 0.5209471766848816,
"grad_norm": 6.5410919189453125,
"learning_rate": 4.2611145898559804e-05,
"loss": 0.0893,
"step": 286
},
{
"epoch": 0.5227686703096539,
"grad_norm": 10.288850784301758,
"learning_rate": 4.257983719474014e-05,
"loss": 0.0205,
"step": 287
},
{
"epoch": 0.5245901639344263,
"grad_norm": 0.04318219795823097,
"learning_rate": 4.254852849092048e-05,
"loss": 0.0017,
"step": 288
},
{
"epoch": 0.5264116575591985,
"grad_norm": 2.292560577392578,
"learning_rate": 4.251721978710082e-05,
"loss": 0.1756,
"step": 289
},
{
"epoch": 0.5282331511839709,
"grad_norm": 3.368222713470459,
"learning_rate": 4.2485911083281156e-05,
"loss": 0.0199,
"step": 290
},
{
"epoch": 0.5300546448087432,
"grad_norm": 4.767663955688477,
"learning_rate": 4.2454602379461494e-05,
"loss": 0.1685,
"step": 291
},
{
"epoch": 0.5318761384335154,
"grad_norm": 0.11847999691963196,
"learning_rate": 4.2423293675641826e-05,
"loss": 0.0029,
"step": 292
},
{
"epoch": 0.5336976320582878,
"grad_norm": 6.336174488067627,
"learning_rate": 4.239198497182217e-05,
"loss": 0.0425,
"step": 293
},
{
"epoch": 0.5355191256830601,
"grad_norm": 15.296269416809082,
"learning_rate": 4.236067626800251e-05,
"loss": 0.0794,
"step": 294
},
{
"epoch": 0.5373406193078324,
"grad_norm": 0.18433484435081482,
"learning_rate": 4.232936756418284e-05,
"loss": 0.0039,
"step": 295
},
{
"epoch": 0.5391621129326047,
"grad_norm": 0.14353422820568085,
"learning_rate": 4.2298058860363185e-05,
"loss": 0.003,
"step": 296
},
{
"epoch": 0.5409836065573771,
"grad_norm": 3.668611526489258,
"learning_rate": 4.226675015654352e-05,
"loss": 0.0258,
"step": 297
},
{
"epoch": 0.5428051001821493,
"grad_norm": 3.0370147228240967,
"learning_rate": 4.223544145272386e-05,
"loss": 0.0125,
"step": 298
},
{
"epoch": 0.5446265938069217,
"grad_norm": 3.1501262187957764,
"learning_rate": 4.220413274890419e-05,
"loss": 0.165,
"step": 299
},
{
"epoch": 0.546448087431694,
"grad_norm": 1.3201171159744263,
"learning_rate": 4.217282404508454e-05,
"loss": 0.0116,
"step": 300
},
{
"epoch": 0.5482695810564663,
"grad_norm": 0.2336684912443161,
"learning_rate": 4.2141515341264876e-05,
"loss": 0.0035,
"step": 301
},
{
"epoch": 0.5500910746812386,
"grad_norm": 9.169921875,
"learning_rate": 4.211020663744521e-05,
"loss": 0.0728,
"step": 302
},
{
"epoch": 0.5519125683060109,
"grad_norm": 0.07561606913805008,
"learning_rate": 4.207889793362555e-05,
"loss": 0.0026,
"step": 303
},
{
"epoch": 0.5537340619307832,
"grad_norm": 1.609074592590332,
"learning_rate": 4.204758922980589e-05,
"loss": 0.0113,
"step": 304
},
{
"epoch": 0.5555555555555556,
"grad_norm": 3.4536828994750977,
"learning_rate": 4.201628052598622e-05,
"loss": 0.0147,
"step": 305
},
{
"epoch": 0.5573770491803278,
"grad_norm": 6.3433918952941895,
"learning_rate": 4.1984971822166566e-05,
"loss": 0.2036,
"step": 306
},
{
"epoch": 0.5591985428051002,
"grad_norm": 4.692470550537109,
"learning_rate": 4.1953663118346905e-05,
"loss": 0.0401,
"step": 307
},
{
"epoch": 0.5610200364298725,
"grad_norm": 6.455625057220459,
"learning_rate": 4.192235441452724e-05,
"loss": 0.0697,
"step": 308
},
{
"epoch": 0.5628415300546448,
"grad_norm": 4.6514716148376465,
"learning_rate": 4.1891045710707574e-05,
"loss": 0.1204,
"step": 309
},
{
"epoch": 0.5646630236794171,
"grad_norm": 3.956284523010254,
"learning_rate": 4.185973700688792e-05,
"loss": 0.1731,
"step": 310
},
{
"epoch": 0.5664845173041895,
"grad_norm": 1.0369857549667358,
"learning_rate": 4.182842830306826e-05,
"loss": 0.0108,
"step": 311
},
{
"epoch": 0.5683060109289617,
"grad_norm": 0.22317107021808624,
"learning_rate": 4.179711959924859e-05,
"loss": 0.0027,
"step": 312
},
{
"epoch": 0.5701275045537341,
"grad_norm": 6.622973442077637,
"learning_rate": 4.1765810895428933e-05,
"loss": 0.0617,
"step": 313
},
{
"epoch": 0.5719489981785064,
"grad_norm": 6.9914422035217285,
"learning_rate": 4.173450219160927e-05,
"loss": 0.08,
"step": 314
},
{
"epoch": 0.5737704918032787,
"grad_norm": 9.093573570251465,
"learning_rate": 4.170319348778961e-05,
"loss": 0.1431,
"step": 315
},
{
"epoch": 0.575591985428051,
"grad_norm": 0.36630722880363464,
"learning_rate": 4.167188478396994e-05,
"loss": 0.0064,
"step": 316
},
{
"epoch": 0.5774134790528234,
"grad_norm": 4.634314060211182,
"learning_rate": 4.1640576080150286e-05,
"loss": 0.1175,
"step": 317
},
{
"epoch": 0.5792349726775956,
"grad_norm": 0.327493816614151,
"learning_rate": 4.1609267376330624e-05,
"loss": 0.0048,
"step": 318
},
{
"epoch": 0.581056466302368,
"grad_norm": 0.24527287483215332,
"learning_rate": 4.1577958672510956e-05,
"loss": 0.0042,
"step": 319
},
{
"epoch": 0.5828779599271403,
"grad_norm": 5.85356330871582,
"learning_rate": 4.15466499686913e-05,
"loss": 0.0292,
"step": 320
},
{
"epoch": 0.5846994535519126,
"grad_norm": 0.6484350562095642,
"learning_rate": 4.151534126487164e-05,
"loss": 0.0061,
"step": 321
},
{
"epoch": 0.5865209471766849,
"grad_norm": 2.3831231594085693,
"learning_rate": 4.148403256105197e-05,
"loss": 0.023,
"step": 322
},
{
"epoch": 0.5883424408014571,
"grad_norm": 0.4163350462913513,
"learning_rate": 4.1452723857232315e-05,
"loss": 0.0057,
"step": 323
},
{
"epoch": 0.5901639344262295,
"grad_norm": 3.6175761222839355,
"learning_rate": 4.142141515341265e-05,
"loss": 0.0257,
"step": 324
},
{
"epoch": 0.5919854280510018,
"grad_norm": 2.211493730545044,
"learning_rate": 4.139010644959299e-05,
"loss": 0.0206,
"step": 325
},
{
"epoch": 0.5938069216757741,
"grad_norm": 0.03227696940302849,
"learning_rate": 4.135879774577332e-05,
"loss": 0.0011,
"step": 326
},
{
"epoch": 0.5956284153005464,
"grad_norm": 0.03025558590888977,
"learning_rate": 4.132748904195367e-05,
"loss": 0.001,
"step": 327
},
{
"epoch": 0.5974499089253188,
"grad_norm": 0.027479926124215126,
"learning_rate": 4.1296180338134006e-05,
"loss": 0.0009,
"step": 328
},
{
"epoch": 0.599271402550091,
"grad_norm": 0.07558054476976395,
"learning_rate": 4.126487163431434e-05,
"loss": 0.0015,
"step": 329
},
{
"epoch": 0.6010928961748634,
"grad_norm": 2.8735299110412598,
"learning_rate": 4.123356293049468e-05,
"loss": 0.0178,
"step": 330
},
{
"epoch": 0.6029143897996357,
"grad_norm": 5.421073913574219,
"learning_rate": 4.120225422667502e-05,
"loss": 0.1629,
"step": 331
},
{
"epoch": 0.604735883424408,
"grad_norm": 3.4357104301452637,
"learning_rate": 4.117094552285535e-05,
"loss": 0.1494,
"step": 332
},
{
"epoch": 0.6065573770491803,
"grad_norm": 3.9810731410980225,
"learning_rate": 4.113963681903569e-05,
"loss": 0.1603,
"step": 333
},
{
"epoch": 0.6083788706739527,
"grad_norm": 0.08673622459173203,
"learning_rate": 4.1108328115216034e-05,
"loss": 0.0013,
"step": 334
},
{
"epoch": 0.6102003642987249,
"grad_norm": 0.03046957589685917,
"learning_rate": 4.107701941139637e-05,
"loss": 0.0011,
"step": 335
},
{
"epoch": 0.6120218579234973,
"grad_norm": 11.446281433105469,
"learning_rate": 4.1045710707576704e-05,
"loss": 0.0644,
"step": 336
},
{
"epoch": 0.6138433515482696,
"grad_norm": 1.642142415046692,
"learning_rate": 4.101440200375705e-05,
"loss": 0.0056,
"step": 337
},
{
"epoch": 0.6156648451730419,
"grad_norm": 1.6858967542648315,
"learning_rate": 4.098309329993739e-05,
"loss": 0.0133,
"step": 338
},
{
"epoch": 0.6174863387978142,
"grad_norm": 0.04817913472652435,
"learning_rate": 4.095178459611772e-05,
"loss": 0.0013,
"step": 339
},
{
"epoch": 0.6193078324225865,
"grad_norm": 1.458479642868042,
"learning_rate": 4.092047589229806e-05,
"loss": 0.0078,
"step": 340
},
{
"epoch": 0.6211293260473588,
"grad_norm": 0.7223337292671204,
"learning_rate": 4.08891671884784e-05,
"loss": 0.0167,
"step": 341
},
{
"epoch": 0.6229508196721312,
"grad_norm": 0.4470398426055908,
"learning_rate": 4.085785848465874e-05,
"loss": 0.0043,
"step": 342
},
{
"epoch": 0.6247723132969034,
"grad_norm": 0.10019668936729431,
"learning_rate": 4.082654978083907e-05,
"loss": 0.0018,
"step": 343
},
{
"epoch": 0.6265938069216758,
"grad_norm": 0.07710490375757217,
"learning_rate": 4.0795241077019416e-05,
"loss": 0.0014,
"step": 344
},
{
"epoch": 0.6284153005464481,
"grad_norm": 0.49735426902770996,
"learning_rate": 4.0763932373199754e-05,
"loss": 0.0083,
"step": 345
},
{
"epoch": 0.6302367941712204,
"grad_norm": 7.425228118896484,
"learning_rate": 4.0732623669380085e-05,
"loss": 0.4383,
"step": 346
},
{
"epoch": 0.6320582877959927,
"grad_norm": 9.259267807006836,
"learning_rate": 4.070131496556043e-05,
"loss": 0.0335,
"step": 347
},
{
"epoch": 0.6338797814207651,
"grad_norm": 1.7133445739746094,
"learning_rate": 4.067000626174077e-05,
"loss": 0.1959,
"step": 348
},
{
"epoch": 0.6357012750455373,
"grad_norm": 7.2402238845825195,
"learning_rate": 4.06386975579211e-05,
"loss": 0.0182,
"step": 349
},
{
"epoch": 0.6375227686703097,
"grad_norm": 1.9264317750930786,
"learning_rate": 4.0607388854101445e-05,
"loss": 0.0096,
"step": 350
},
{
"epoch": 0.639344262295082,
"grad_norm": 5.997550964355469,
"learning_rate": 4.057608015028178e-05,
"loss": 0.2692,
"step": 351
},
{
"epoch": 0.6411657559198543,
"grad_norm": 0.8955827355384827,
"learning_rate": 4.054477144646212e-05,
"loss": 0.008,
"step": 352
},
{
"epoch": 0.6429872495446266,
"grad_norm": 15.662752151489258,
"learning_rate": 4.051346274264245e-05,
"loss": 0.0583,
"step": 353
},
{
"epoch": 0.644808743169399,
"grad_norm": 0.17019307613372803,
"learning_rate": 4.04821540388228e-05,
"loss": 0.0045,
"step": 354
},
{
"epoch": 0.6466302367941712,
"grad_norm": 0.09931718558073044,
"learning_rate": 4.0450845335003135e-05,
"loss": 0.0026,
"step": 355
},
{
"epoch": 0.6484517304189436,
"grad_norm": 0.13057781755924225,
"learning_rate": 4.041953663118347e-05,
"loss": 0.0043,
"step": 356
},
{
"epoch": 0.6502732240437158,
"grad_norm": 8.035531044006348,
"learning_rate": 4.038822792736381e-05,
"loss": 0.1404,
"step": 357
},
{
"epoch": 0.6520947176684881,
"grad_norm": 5.847539901733398,
"learning_rate": 4.035691922354415e-05,
"loss": 0.096,
"step": 358
},
{
"epoch": 0.6539162112932605,
"grad_norm": 2.056274890899658,
"learning_rate": 4.032561051972448e-05,
"loss": 0.0151,
"step": 359
},
{
"epoch": 0.6557377049180327,
"grad_norm": 0.13496744632720947,
"learning_rate": 4.029430181590482e-05,
"loss": 0.0037,
"step": 360
},
{
"epoch": 0.6575591985428051,
"grad_norm": 1.0296528339385986,
"learning_rate": 4.0262993112085164e-05,
"loss": 0.0107,
"step": 361
},
{
"epoch": 0.6593806921675774,
"grad_norm": 5.667383193969727,
"learning_rate": 4.02316844082655e-05,
"loss": 0.0813,
"step": 362
},
{
"epoch": 0.6612021857923497,
"grad_norm": 4.281712055206299,
"learning_rate": 4.0200375704445834e-05,
"loss": 0.0377,
"step": 363
},
{
"epoch": 0.663023679417122,
"grad_norm": 0.37562137842178345,
"learning_rate": 4.016906700062618e-05,
"loss": 0.0068,
"step": 364
},
{
"epoch": 0.6648451730418944,
"grad_norm": 7.240072727203369,
"learning_rate": 4.013775829680652e-05,
"loss": 0.0583,
"step": 365
},
{
"epoch": 0.6666666666666666,
"grad_norm": 1.4774280786514282,
"learning_rate": 4.010644959298685e-05,
"loss": 0.0111,
"step": 366
},
{
"epoch": 0.668488160291439,
"grad_norm": 2.835345983505249,
"learning_rate": 4.007514088916719e-05,
"loss": 0.048,
"step": 367
},
{
"epoch": 0.6703096539162113,
"grad_norm": 0.5821844935417175,
"learning_rate": 4.004383218534753e-05,
"loss": 0.0049,
"step": 368
},
{
"epoch": 0.6721311475409836,
"grad_norm": 3.108658790588379,
"learning_rate": 4.001252348152787e-05,
"loss": 0.0191,
"step": 369
},
{
"epoch": 0.6739526411657559,
"grad_norm": 8.561149597167969,
"learning_rate": 3.99812147777082e-05,
"loss": 0.0647,
"step": 370
},
{
"epoch": 0.6757741347905283,
"grad_norm": 0.19415520131587982,
"learning_rate": 3.9949906073888546e-05,
"loss": 0.0029,
"step": 371
},
{
"epoch": 0.6775956284153005,
"grad_norm": 0.06202491745352745,
"learning_rate": 3.9918597370068884e-05,
"loss": 0.0018,
"step": 372
},
{
"epoch": 0.6794171220400729,
"grad_norm": 3.6461710929870605,
"learning_rate": 3.9887288666249215e-05,
"loss": 0.0083,
"step": 373
},
{
"epoch": 0.6812386156648452,
"grad_norm": 15.124981880187988,
"learning_rate": 3.985597996242956e-05,
"loss": 0.1567,
"step": 374
},
{
"epoch": 0.6830601092896175,
"grad_norm": 2.153791666030884,
"learning_rate": 3.98246712586099e-05,
"loss": 0.0105,
"step": 375
},
{
"epoch": 0.6848816029143898,
"grad_norm": 0.7105178236961365,
"learning_rate": 3.979336255479023e-05,
"loss": 0.0032,
"step": 376
},
{
"epoch": 0.6867030965391621,
"grad_norm": 0.782936692237854,
"learning_rate": 3.976205385097057e-05,
"loss": 0.0053,
"step": 377
},
{
"epoch": 0.6885245901639344,
"grad_norm": 17.51541519165039,
"learning_rate": 3.973074514715091e-05,
"loss": 0.0756,
"step": 378
},
{
"epoch": 0.6903460837887068,
"grad_norm": 8.316147804260254,
"learning_rate": 3.969943644333125e-05,
"loss": 0.0734,
"step": 379
},
{
"epoch": 0.692167577413479,
"grad_norm": 0.5029911398887634,
"learning_rate": 3.966812773951158e-05,
"loss": 0.0039,
"step": 380
},
{
"epoch": 0.6939890710382514,
"grad_norm": 0.038834672421216965,
"learning_rate": 3.963681903569193e-05,
"loss": 0.001,
"step": 381
},
{
"epoch": 0.6958105646630237,
"grad_norm": 6.278717517852783,
"learning_rate": 3.9605510331872265e-05,
"loss": 0.0977,
"step": 382
},
{
"epoch": 0.697632058287796,
"grad_norm": 0.07930924743413925,
"learning_rate": 3.9574201628052597e-05,
"loss": 0.001,
"step": 383
},
{
"epoch": 0.6994535519125683,
"grad_norm": 0.039321571588516235,
"learning_rate": 3.954289292423294e-05,
"loss": 0.0008,
"step": 384
},
{
"epoch": 0.7012750455373407,
"grad_norm": 0.296947717666626,
"learning_rate": 3.951158422041328e-05,
"loss": 0.0024,
"step": 385
},
{
"epoch": 0.7030965391621129,
"grad_norm": 9.602386474609375,
"learning_rate": 3.948027551659361e-05,
"loss": 0.2459,
"step": 386
},
{
"epoch": 0.7049180327868853,
"grad_norm": 0.015402719378471375,
"learning_rate": 3.944896681277395e-05,
"loss": 0.0006,
"step": 387
},
{
"epoch": 0.7067395264116576,
"grad_norm": 0.014821278862655163,
"learning_rate": 3.9417658108954294e-05,
"loss": 0.0005,
"step": 388
},
{
"epoch": 0.7085610200364298,
"grad_norm": 0.03218044340610504,
"learning_rate": 3.938634940513463e-05,
"loss": 0.0008,
"step": 389
},
{
"epoch": 0.7103825136612022,
"grad_norm": 0.2660595774650574,
"learning_rate": 3.9355040701314964e-05,
"loss": 0.002,
"step": 390
},
{
"epoch": 0.7122040072859745,
"grad_norm": 10.710992813110352,
"learning_rate": 3.932373199749531e-05,
"loss": 0.1631,
"step": 391
},
{
"epoch": 0.7140255009107468,
"grad_norm": 0.07790596038103104,
"learning_rate": 3.929242329367565e-05,
"loss": 0.0011,
"step": 392
},
{
"epoch": 0.7158469945355191,
"grad_norm": 6.2742462158203125,
"learning_rate": 3.926111458985598e-05,
"loss": 0.0302,
"step": 393
},
{
"epoch": 0.7176684881602914,
"grad_norm": 2.138582706451416,
"learning_rate": 3.9229805886036316e-05,
"loss": 0.1987,
"step": 394
},
{
"epoch": 0.7194899817850637,
"grad_norm": 0.017461583018302917,
"learning_rate": 3.919849718221666e-05,
"loss": 0.0005,
"step": 395
},
{
"epoch": 0.7213114754098361,
"grad_norm": 0.812929093837738,
"learning_rate": 3.9167188478397e-05,
"loss": 0.0035,
"step": 396
},
{
"epoch": 0.7231329690346083,
"grad_norm": 0.6114425659179688,
"learning_rate": 3.913587977457733e-05,
"loss": 0.0053,
"step": 397
},
{
"epoch": 0.7249544626593807,
"grad_norm": 4.900968074798584,
"learning_rate": 3.9104571070757675e-05,
"loss": 0.212,
"step": 398
},
{
"epoch": 0.726775956284153,
"grad_norm": 0.29532384872436523,
"learning_rate": 3.9073262366938014e-05,
"loss": 0.0041,
"step": 399
},
{
"epoch": 0.7285974499089253,
"grad_norm": 8.498466491699219,
"learning_rate": 3.9041953663118345e-05,
"loss": 0.0716,
"step": 400
},
{
"epoch": 0.7304189435336976,
"grad_norm": 13.04411792755127,
"learning_rate": 3.901064495929869e-05,
"loss": 0.0875,
"step": 401
},
{
"epoch": 0.73224043715847,
"grad_norm": 0.21621406078338623,
"learning_rate": 3.897933625547903e-05,
"loss": 0.002,
"step": 402
},
{
"epoch": 0.7340619307832422,
"grad_norm": 3.328409194946289,
"learning_rate": 3.894802755165936e-05,
"loss": 0.0098,
"step": 403
},
{
"epoch": 0.7358834244080146,
"grad_norm": 9.559609413146973,
"learning_rate": 3.89167188478397e-05,
"loss": 0.1343,
"step": 404
},
{
"epoch": 0.7377049180327869,
"grad_norm": 0.6983762979507446,
"learning_rate": 3.888541014402004e-05,
"loss": 0.0027,
"step": 405
},
{
"epoch": 0.7395264116575592,
"grad_norm": 0.05780564993619919,
"learning_rate": 3.885410144020038e-05,
"loss": 0.0012,
"step": 406
},
{
"epoch": 0.7413479052823315,
"grad_norm": 1.37694251537323,
"learning_rate": 3.882279273638071e-05,
"loss": 0.2163,
"step": 407
},
{
"epoch": 0.7431693989071039,
"grad_norm": 0.019220901653170586,
"learning_rate": 3.879148403256106e-05,
"loss": 0.0007,
"step": 408
},
{
"epoch": 0.7449908925318761,
"grad_norm": 1.4538003206253052,
"learning_rate": 3.8760175328741395e-05,
"loss": 0.0067,
"step": 409
},
{
"epoch": 0.7468123861566485,
"grad_norm": 0.03995515778660774,
"learning_rate": 3.8728866624921726e-05,
"loss": 0.0013,
"step": 410
},
{
"epoch": 0.7486338797814208,
"grad_norm": 7.3194146156311035,
"learning_rate": 3.8697557921102065e-05,
"loss": 0.071,
"step": 411
},
{
"epoch": 0.7504553734061931,
"grad_norm": 5.2992024421691895,
"learning_rate": 3.866624921728241e-05,
"loss": 0.114,
"step": 412
},
{
"epoch": 0.7522768670309654,
"grad_norm": 1.8797881603240967,
"learning_rate": 3.863494051346274e-05,
"loss": 0.0116,
"step": 413
},
{
"epoch": 0.7540983606557377,
"grad_norm": 0.04209226742386818,
"learning_rate": 3.860363180964308e-05,
"loss": 0.0015,
"step": 414
},
{
"epoch": 0.75591985428051,
"grad_norm": 9.969036102294922,
"learning_rate": 3.8572323105823424e-05,
"loss": 0.1007,
"step": 415
},
{
"epoch": 0.7577413479052824,
"grad_norm": 4.476265907287598,
"learning_rate": 3.854101440200376e-05,
"loss": 0.0236,
"step": 416
},
{
"epoch": 0.7595628415300546,
"grad_norm": 5.227812767028809,
"learning_rate": 3.850970569818409e-05,
"loss": 0.1293,
"step": 417
},
{
"epoch": 0.761384335154827,
"grad_norm": 0.5161237120628357,
"learning_rate": 3.847839699436444e-05,
"loss": 0.0068,
"step": 418
},
{
"epoch": 0.7632058287795993,
"grad_norm": 0.06678824126720428,
"learning_rate": 3.8447088290544776e-05,
"loss": 0.0022,
"step": 419
},
{
"epoch": 0.7650273224043715,
"grad_norm": 11.278353691101074,
"learning_rate": 3.841577958672511e-05,
"loss": 0.0381,
"step": 420
},
{
"epoch": 0.7668488160291439,
"grad_norm": 1.4786038398742676,
"learning_rate": 3.8384470882905446e-05,
"loss": 0.0134,
"step": 421
},
{
"epoch": 0.7686703096539163,
"grad_norm": 3.063671588897705,
"learning_rate": 3.835316217908579e-05,
"loss": 0.0226,
"step": 422
},
{
"epoch": 0.7704918032786885,
"grad_norm": 0.3474072813987732,
"learning_rate": 3.832185347526613e-05,
"loss": 0.0075,
"step": 423
},
{
"epoch": 0.7723132969034608,
"grad_norm": 0.31541740894317627,
"learning_rate": 3.829054477144646e-05,
"loss": 0.0058,
"step": 424
},
{
"epoch": 0.7741347905282332,
"grad_norm": 1.1225982904434204,
"learning_rate": 3.8259236067626805e-05,
"loss": 0.1693,
"step": 425
},
{
"epoch": 0.7759562841530054,
"grad_norm": 0.09978262335062027,
"learning_rate": 3.8227927363807143e-05,
"loss": 0.0031,
"step": 426
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.08662858605384827,
"learning_rate": 3.8196618659987475e-05,
"loss": 0.0021,
"step": 427
},
{
"epoch": 0.7795992714025501,
"grad_norm": 0.11617692559957504,
"learning_rate": 3.816530995616781e-05,
"loss": 0.004,
"step": 428
},
{
"epoch": 0.7814207650273224,
"grad_norm": 0.17725755274295807,
"learning_rate": 3.813400125234816e-05,
"loss": 0.0034,
"step": 429
},
{
"epoch": 0.7832422586520947,
"grad_norm": 11.300631523132324,
"learning_rate": 3.810269254852849e-05,
"loss": 0.0706,
"step": 430
},
{
"epoch": 0.785063752276867,
"grad_norm": 0.10579409450292587,
"learning_rate": 3.807138384470883e-05,
"loss": 0.0033,
"step": 431
},
{
"epoch": 0.7868852459016393,
"grad_norm": 2.0246171951293945,
"learning_rate": 3.804007514088917e-05,
"loss": 0.157,
"step": 432
},
{
"epoch": 0.7887067395264117,
"grad_norm": 9.128832817077637,
"learning_rate": 3.800876643706951e-05,
"loss": 0.0421,
"step": 433
},
{
"epoch": 0.7905282331511839,
"grad_norm": 1.876198172569275,
"learning_rate": 3.797745773324984e-05,
"loss": 0.0129,
"step": 434
},
{
"epoch": 0.7923497267759563,
"grad_norm": 2.0219812393188477,
"learning_rate": 3.794614902943019e-05,
"loss": 0.1312,
"step": 435
},
{
"epoch": 0.7941712204007286,
"grad_norm": 1.8168342113494873,
"learning_rate": 3.7914840325610525e-05,
"loss": 0.079,
"step": 436
},
{
"epoch": 0.7959927140255009,
"grad_norm": 9.818557739257812,
"learning_rate": 3.7883531621790856e-05,
"loss": 0.2222,
"step": 437
},
{
"epoch": 0.7978142076502732,
"grad_norm": 10.846918106079102,
"learning_rate": 3.7852222917971194e-05,
"loss": 0.1435,
"step": 438
},
{
"epoch": 0.7996357012750456,
"grad_norm": 10.874890327453613,
"learning_rate": 3.782091421415154e-05,
"loss": 0.1216,
"step": 439
},
{
"epoch": 0.8014571948998178,
"grad_norm": 10.522339820861816,
"learning_rate": 3.778960551033187e-05,
"loss": 0.0773,
"step": 440
},
{
"epoch": 0.8032786885245902,
"grad_norm": 11.772398948669434,
"learning_rate": 3.775829680651221e-05,
"loss": 0.5585,
"step": 441
},
{
"epoch": 0.8051001821493625,
"grad_norm": 2.1887152194976807,
"learning_rate": 3.7726988102692554e-05,
"loss": 0.041,
"step": 442
},
{
"epoch": 0.8069216757741348,
"grad_norm": 1.6691868305206299,
"learning_rate": 3.769567939887289e-05,
"loss": 0.0391,
"step": 443
},
{
"epoch": 0.8087431693989071,
"grad_norm": 1.4274911880493164,
"learning_rate": 3.766437069505322e-05,
"loss": 0.0277,
"step": 444
},
{
"epoch": 0.8105646630236795,
"grad_norm": 0.6223703026771545,
"learning_rate": 3.763306199123356e-05,
"loss": 0.0171,
"step": 445
},
{
"epoch": 0.8123861566484517,
"grad_norm": 0.23633567988872528,
"learning_rate": 3.7601753287413906e-05,
"loss": 0.008,
"step": 446
},
{
"epoch": 0.8142076502732241,
"grad_norm": 1.7008121013641357,
"learning_rate": 3.757044458359424e-05,
"loss": 0.0079,
"step": 447
},
{
"epoch": 0.8160291438979964,
"grad_norm": 0.08883205056190491,
"learning_rate": 3.7539135879774576e-05,
"loss": 0.0032,
"step": 448
},
{
"epoch": 0.8178506375227687,
"grad_norm": 2.3876216411590576,
"learning_rate": 3.750782717595492e-05,
"loss": 0.111,
"step": 449
},
{
"epoch": 0.819672131147541,
"grad_norm": 5.239230155944824,
"learning_rate": 3.747651847213526e-05,
"loss": 0.0726,
"step": 450
},
{
"epoch": 0.8214936247723132,
"grad_norm": 4.39957857131958,
"learning_rate": 3.744520976831559e-05,
"loss": 0.0349,
"step": 451
},
{
"epoch": 0.8233151183970856,
"grad_norm": 4.1816534996032715,
"learning_rate": 3.7413901064495935e-05,
"loss": 0.1397,
"step": 452
},
{
"epoch": 0.825136612021858,
"grad_norm": 0.13828523457050323,
"learning_rate": 3.738259236067627e-05,
"loss": 0.0037,
"step": 453
},
{
"epoch": 0.8269581056466302,
"grad_norm": 0.7528238892555237,
"learning_rate": 3.7351283656856605e-05,
"loss": 0.0062,
"step": 454
},
{
"epoch": 0.8287795992714025,
"grad_norm": 11.578791618347168,
"learning_rate": 3.731997495303694e-05,
"loss": 0.2069,
"step": 455
},
{
"epoch": 0.8306010928961749,
"grad_norm": 0.3673207759857178,
"learning_rate": 3.728866624921729e-05,
"loss": 0.0033,
"step": 456
},
{
"epoch": 0.8324225865209471,
"grad_norm": 0.3435652554035187,
"learning_rate": 3.725735754539762e-05,
"loss": 0.003,
"step": 457
},
{
"epoch": 0.8342440801457195,
"grad_norm": 0.055871132761240005,
"learning_rate": 3.722604884157796e-05,
"loss": 0.0019,
"step": 458
},
{
"epoch": 0.8360655737704918,
"grad_norm": 5.986182689666748,
"learning_rate": 3.71947401377583e-05,
"loss": 0.0398,
"step": 459
},
{
"epoch": 0.8378870673952641,
"grad_norm": 2.631530284881592,
"learning_rate": 3.716343143393864e-05,
"loss": 0.0158,
"step": 460
},
{
"epoch": 0.8397085610200364,
"grad_norm": 12.033699035644531,
"learning_rate": 3.713212273011897e-05,
"loss": 0.1011,
"step": 461
},
{
"epoch": 0.8415300546448088,
"grad_norm": 0.41058558225631714,
"learning_rate": 3.7100814026299316e-05,
"loss": 0.0047,
"step": 462
},
{
"epoch": 0.843351548269581,
"grad_norm": 2.519164800643921,
"learning_rate": 3.7069505322479655e-05,
"loss": 0.2319,
"step": 463
},
{
"epoch": 0.8451730418943534,
"grad_norm": 0.09700655937194824,
"learning_rate": 3.7038196618659986e-05,
"loss": 0.0021,
"step": 464
},
{
"epoch": 0.8469945355191257,
"grad_norm": 1.427298903465271,
"learning_rate": 3.7006887914840324e-05,
"loss": 0.0121,
"step": 465
},
{
"epoch": 0.848816029143898,
"grad_norm": 2.7123031616210938,
"learning_rate": 3.697557921102067e-05,
"loss": 0.0099,
"step": 466
},
{
"epoch": 0.8506375227686703,
"grad_norm": 0.1832200437784195,
"learning_rate": 3.6944270507201e-05,
"loss": 0.0016,
"step": 467
},
{
"epoch": 0.8524590163934426,
"grad_norm": 0.05797650292515755,
"learning_rate": 3.691296180338134e-05,
"loss": 0.0017,
"step": 468
},
{
"epoch": 0.8542805100182149,
"grad_norm": 11.710906028747559,
"learning_rate": 3.6881653099561683e-05,
"loss": 0.2383,
"step": 469
},
{
"epoch": 0.8561020036429873,
"grad_norm": 0.06885645538568497,
"learning_rate": 3.685034439574202e-05,
"loss": 0.0019,
"step": 470
},
{
"epoch": 0.8579234972677595,
"grad_norm": 0.06676291674375534,
"learning_rate": 3.681903569192235e-05,
"loss": 0.0019,
"step": 471
},
{
"epoch": 0.8597449908925319,
"grad_norm": 1.3994064331054688,
"learning_rate": 3.678772698810269e-05,
"loss": 0.0091,
"step": 472
},
{
"epoch": 0.8615664845173042,
"grad_norm": 0.18276052176952362,
"learning_rate": 3.6756418284283036e-05,
"loss": 0.0018,
"step": 473
},
{
"epoch": 0.8633879781420765,
"grad_norm": 0.28978365659713745,
"learning_rate": 3.672510958046337e-05,
"loss": 0.0033,
"step": 474
},
{
"epoch": 0.8652094717668488,
"grad_norm": 0.03744082152843475,
"learning_rate": 3.6693800876643706e-05,
"loss": 0.0013,
"step": 475
},
{
"epoch": 0.8670309653916212,
"grad_norm": 2.398526906967163,
"learning_rate": 3.666249217282405e-05,
"loss": 0.0122,
"step": 476
},
{
"epoch": 0.8688524590163934,
"grad_norm": 0.02406764030456543,
"learning_rate": 3.663118346900439e-05,
"loss": 0.0009,
"step": 477
},
{
"epoch": 0.8706739526411658,
"grad_norm": 0.37078964710235596,
"learning_rate": 3.659987476518472e-05,
"loss": 0.0022,
"step": 478
},
{
"epoch": 0.8724954462659381,
"grad_norm": 4.862973213195801,
"learning_rate": 3.6568566061365065e-05,
"loss": 0.0194,
"step": 479
},
{
"epoch": 0.8743169398907104,
"grad_norm": 0.055899012833833694,
"learning_rate": 3.65372573575454e-05,
"loss": 0.0012,
"step": 480
},
{
"epoch": 0.8761384335154827,
"grad_norm": 4.453655242919922,
"learning_rate": 3.6505948653725734e-05,
"loss": 0.1545,
"step": 481
},
{
"epoch": 0.8779599271402551,
"grad_norm": 0.08148118853569031,
"learning_rate": 3.647463994990607e-05,
"loss": 0.0015,
"step": 482
},
{
"epoch": 0.8797814207650273,
"grad_norm": 0.11519090086221695,
"learning_rate": 3.644333124608642e-05,
"loss": 0.0012,
"step": 483
},
{
"epoch": 0.8816029143897997,
"grad_norm": 0.03914317488670349,
"learning_rate": 3.641202254226675e-05,
"loss": 0.0011,
"step": 484
},
{
"epoch": 0.8834244080145719,
"grad_norm": 1.1666841506958008,
"learning_rate": 3.638071383844709e-05,
"loss": 0.2138,
"step": 485
},
{
"epoch": 0.8852459016393442,
"grad_norm": 0.02257809415459633,
"learning_rate": 3.634940513462743e-05,
"loss": 0.0008,
"step": 486
},
{
"epoch": 0.8870673952641166,
"grad_norm": 0.026867952197790146,
"learning_rate": 3.631809643080777e-05,
"loss": 0.001,
"step": 487
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.03357498347759247,
"learning_rate": 3.62867877269881e-05,
"loss": 0.0012,
"step": 488
},
{
"epoch": 0.8907103825136612,
"grad_norm": 3.916914701461792,
"learning_rate": 3.625547902316844e-05,
"loss": 0.1349,
"step": 489
},
{
"epoch": 0.8925318761384335,
"grad_norm": 3.825364828109741,
"learning_rate": 3.6224170319348784e-05,
"loss": 0.0989,
"step": 490
},
{
"epoch": 0.8943533697632058,
"grad_norm": 0.034188635647296906,
"learning_rate": 3.6192861615529116e-05,
"loss": 0.0011,
"step": 491
},
{
"epoch": 0.8961748633879781,
"grad_norm": 6.912594318389893,
"learning_rate": 3.6161552911709454e-05,
"loss": 0.0512,
"step": 492
},
{
"epoch": 0.8979963570127505,
"grad_norm": 3.054703712463379,
"learning_rate": 3.61302442078898e-05,
"loss": 0.0481,
"step": 493
},
{
"epoch": 0.8998178506375227,
"grad_norm": 0.16723403334617615,
"learning_rate": 3.609893550407013e-05,
"loss": 0.0034,
"step": 494
},
{
"epoch": 0.9016393442622951,
"grad_norm": 17.146343231201172,
"learning_rate": 3.606762680025047e-05,
"loss": 0.2352,
"step": 495
},
{
"epoch": 0.9034608378870674,
"grad_norm": 4.063144207000732,
"learning_rate": 3.603631809643081e-05,
"loss": 0.0731,
"step": 496
},
{
"epoch": 0.9052823315118397,
"grad_norm": 0.10850164294242859,
"learning_rate": 3.600500939261115e-05,
"loss": 0.0027,
"step": 497
},
{
"epoch": 0.907103825136612,
"grad_norm": 0.18612350523471832,
"learning_rate": 3.597370068879148e-05,
"loss": 0.0035,
"step": 498
},
{
"epoch": 0.9089253187613844,
"grad_norm": 0.1478041708469391,
"learning_rate": 3.594239198497182e-05,
"loss": 0.0032,
"step": 499
},
{
"epoch": 0.9107468123861566,
"grad_norm": 0.07824063301086426,
"learning_rate": 3.5911083281152166e-05,
"loss": 0.002,
"step": 500
},
{
"epoch": 0.912568306010929,
"grad_norm": 0.08944151550531387,
"learning_rate": 3.58797745773325e-05,
"loss": 0.0026,
"step": 501
},
{
"epoch": 0.9143897996357013,
"grad_norm": 0.04674902930855751,
"learning_rate": 3.5848465873512835e-05,
"loss": 0.0015,
"step": 502
},
{
"epoch": 0.9162112932604736,
"grad_norm": 0.038566358387470245,
"learning_rate": 3.581715716969318e-05,
"loss": 0.0013,
"step": 503
},
{
"epoch": 0.9180327868852459,
"grad_norm": 5.497028350830078,
"learning_rate": 3.578584846587352e-05,
"loss": 0.2885,
"step": 504
},
{
"epoch": 0.9198542805100182,
"grad_norm": 0.0487968772649765,
"learning_rate": 3.575453976205385e-05,
"loss": 0.0015,
"step": 505
},
{
"epoch": 0.9216757741347905,
"grad_norm": 0.04053632542490959,
"learning_rate": 3.572323105823419e-05,
"loss": 0.0014,
"step": 506
},
{
"epoch": 0.9234972677595629,
"grad_norm": 0.18789644539356232,
"learning_rate": 3.569192235441453e-05,
"loss": 0.003,
"step": 507
},
{
"epoch": 0.9253187613843351,
"grad_norm": 0.161463662981987,
"learning_rate": 3.5660613650594864e-05,
"loss": 0.0028,
"step": 508
},
{
"epoch": 0.9271402550091075,
"grad_norm": 0.05199124291539192,
"learning_rate": 3.56293049467752e-05,
"loss": 0.0017,
"step": 509
},
{
"epoch": 0.9289617486338798,
"grad_norm": 0.035535071045160294,
"learning_rate": 3.559799624295555e-05,
"loss": 0.0013,
"step": 510
},
{
"epoch": 0.930783242258652,
"grad_norm": 0.05581507459282875,
"learning_rate": 3.556668753913588e-05,
"loss": 0.0018,
"step": 511
},
{
"epoch": 0.9326047358834244,
"grad_norm": 0.47275644540786743,
"learning_rate": 3.553537883531622e-05,
"loss": 0.004,
"step": 512
},
{
"epoch": 0.9344262295081968,
"grad_norm": 0.0707859992980957,
"learning_rate": 3.550407013149656e-05,
"loss": 0.0015,
"step": 513
},
{
"epoch": 0.936247723132969,
"grad_norm": 8.448419570922852,
"learning_rate": 3.54727614276769e-05,
"loss": 0.1712,
"step": 514
},
{
"epoch": 0.9380692167577414,
"grad_norm": 2.0937178134918213,
"learning_rate": 3.544145272385723e-05,
"loss": 0.017,
"step": 515
},
{
"epoch": 0.9398907103825137,
"grad_norm": 0.7188909649848938,
"learning_rate": 3.541014402003757e-05,
"loss": 0.0084,
"step": 516
},
{
"epoch": 0.941712204007286,
"grad_norm": 0.07177567481994629,
"learning_rate": 3.5378835316217914e-05,
"loss": 0.0015,
"step": 517
},
{
"epoch": 0.9435336976320583,
"grad_norm": 0.686763346195221,
"learning_rate": 3.5347526612398246e-05,
"loss": 0.0047,
"step": 518
},
{
"epoch": 0.9453551912568307,
"grad_norm": 0.06569703668355942,
"learning_rate": 3.5316217908578584e-05,
"loss": 0.0016,
"step": 519
},
{
"epoch": 0.9471766848816029,
"grad_norm": 0.024774452671408653,
"learning_rate": 3.528490920475893e-05,
"loss": 0.0009,
"step": 520
},
{
"epoch": 0.9489981785063752,
"grad_norm": 0.03425534442067146,
"learning_rate": 3.525360050093926e-05,
"loss": 0.0011,
"step": 521
},
{
"epoch": 0.9508196721311475,
"grad_norm": 0.03125905990600586,
"learning_rate": 3.52222917971196e-05,
"loss": 0.001,
"step": 522
},
{
"epoch": 0.9526411657559198,
"grad_norm": 10.502355575561523,
"learning_rate": 3.5190983093299936e-05,
"loss": 0.2401,
"step": 523
},
{
"epoch": 0.9544626593806922,
"grad_norm": 0.037440430372953415,
"learning_rate": 3.515967438948028e-05,
"loss": 0.0009,
"step": 524
},
{
"epoch": 0.9562841530054644,
"grad_norm": 6.415884017944336,
"learning_rate": 3.512836568566061e-05,
"loss": 0.1531,
"step": 525
},
{
"epoch": 0.9581056466302368,
"grad_norm": 0.03087371401488781,
"learning_rate": 3.509705698184095e-05,
"loss": 0.001,
"step": 526
},
{
"epoch": 0.9599271402550091,
"grad_norm": 8.961065292358398,
"learning_rate": 3.5065748278021296e-05,
"loss": 0.0851,
"step": 527
},
{
"epoch": 0.9617486338797814,
"grad_norm": 0.1427253782749176,
"learning_rate": 3.503443957420163e-05,
"loss": 0.0018,
"step": 528
},
{
"epoch": 0.9635701275045537,
"grad_norm": 2.9051027297973633,
"learning_rate": 3.5003130870381965e-05,
"loss": 0.0202,
"step": 529
},
{
"epoch": 0.9653916211293261,
"grad_norm": 1.785143494606018,
"learning_rate": 3.497182216656231e-05,
"loss": 0.191,
"step": 530
},
{
"epoch": 0.9672131147540983,
"grad_norm": 0.08835508674383163,
"learning_rate": 3.494051346274265e-05,
"loss": 0.0012,
"step": 531
},
{
"epoch": 0.9690346083788707,
"grad_norm": 3.4107658863067627,
"learning_rate": 3.490920475892298e-05,
"loss": 0.0126,
"step": 532
},
{
"epoch": 0.970856102003643,
"grad_norm": 3.6474239826202393,
"learning_rate": 3.487789605510332e-05,
"loss": 0.1471,
"step": 533
},
{
"epoch": 0.9726775956284153,
"grad_norm": 1.437477946281433,
"learning_rate": 3.484658735128366e-05,
"loss": 0.0089,
"step": 534
},
{
"epoch": 0.9744990892531876,
"grad_norm": 0.03210463002324104,
"learning_rate": 3.4815278647463994e-05,
"loss": 0.001,
"step": 535
},
{
"epoch": 0.97632058287796,
"grad_norm": 4.223978042602539,
"learning_rate": 3.478396994364433e-05,
"loss": 0.0222,
"step": 536
},
{
"epoch": 0.9781420765027322,
"grad_norm": 0.0497698038816452,
"learning_rate": 3.475266123982468e-05,
"loss": 0.0016,
"step": 537
},
{
"epoch": 0.9799635701275046,
"grad_norm": 7.607251167297363,
"learning_rate": 3.472135253600501e-05,
"loss": 0.0362,
"step": 538
},
{
"epoch": 0.9817850637522769,
"grad_norm": 0.5749123692512512,
"learning_rate": 3.4690043832185347e-05,
"loss": 0.0097,
"step": 539
},
{
"epoch": 0.9836065573770492,
"grad_norm": 0.22390051186084747,
"learning_rate": 3.4658735128365685e-05,
"loss": 0.0029,
"step": 540
},
{
"epoch": 0.9854280510018215,
"grad_norm": 11.045784950256348,
"learning_rate": 3.462742642454603e-05,
"loss": 0.064,
"step": 541
},
{
"epoch": 0.9872495446265938,
"grad_norm": 0.17796143889427185,
"learning_rate": 3.459611772072636e-05,
"loss": 0.0021,
"step": 542
},
{
"epoch": 0.9890710382513661,
"grad_norm": 0.05928805470466614,
"learning_rate": 3.45648090169067e-05,
"loss": 0.0015,
"step": 543
},
{
"epoch": 0.9908925318761385,
"grad_norm": 0.3007548451423645,
"learning_rate": 3.4533500313087044e-05,
"loss": 0.0031,
"step": 544
},
{
"epoch": 0.9927140255009107,
"grad_norm": 0.2960835099220276,
"learning_rate": 3.4502191609267375e-05,
"loss": 0.003,
"step": 545
},
{
"epoch": 0.994535519125683,
"grad_norm": 0.2534222900867462,
"learning_rate": 3.4470882905447714e-05,
"loss": 0.0022,
"step": 546
},
{
"epoch": 0.9963570127504554,
"grad_norm": 9.211565971374512,
"learning_rate": 3.443957420162806e-05,
"loss": 0.0399,
"step": 547
},
{
"epoch": 0.9981785063752276,
"grad_norm": 4.776447772979736,
"learning_rate": 3.4408265497808397e-05,
"loss": 0.0172,
"step": 548
},
{
"epoch": 1.0,
"grad_norm": 8.753249168395996,
"learning_rate": 3.437695679398873e-05,
"loss": 0.1608,
"step": 549
},
{
"epoch": 1.0,
"eval_accuracy": 0.988615664845173,
"eval_loss": 0.04577361047267914,
"eval_runtime": 72.6414,
"eval_samples_per_second": 60.461,
"eval_steps_per_second": 1.9,
"step": 549
}
],
"logging_steps": 1,
"max_steps": 1647,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4622335020564480.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}