|
{ |
|
"best_metric": 0.04577361047267914, |
|
"best_model_checkpoint": "./phishing-email-detection/checkpoint-549", |
|
"epoch": 1.0, |
|
"eval_steps": 1, |
|
"global_step": 549, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018214936247723133, |
|
"grad_norm": 3.5286669731140137, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7387, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0036429872495446266, |
|
"grad_norm": 5.887165069580078, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7757, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00546448087431694, |
|
"grad_norm": 4.956603050231934, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7739, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007285974499089253, |
|
"grad_norm": 1.441901683807373, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6558, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009107468123861567, |
|
"grad_norm": 2.219719648361206, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7077, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01092896174863388, |
|
"grad_norm": 3.6860222816467285, |
|
"learning_rate": 6e-06, |
|
"loss": 0.7193, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.012750455373406194, |
|
"grad_norm": 4.651106834411621, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.7194, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.014571948998178506, |
|
"grad_norm": 2.381688117980957, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6798, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01639344262295082, |
|
"grad_norm": 2.163804292678833, |
|
"learning_rate": 9e-06, |
|
"loss": 0.6845, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018214936247723135, |
|
"grad_norm": 3.0246245861053467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6573, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020036429872495445, |
|
"grad_norm": 7.085489273071289, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.7286, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 3.3664584159851074, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.7074, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.023679417122040074, |
|
"grad_norm": 2.2570459842681885, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.64, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.025500910746812388, |
|
"grad_norm": 2.044220447540283, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.6568, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0273224043715847, |
|
"grad_norm": 2.8037195205688477, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6471, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.029143897996357013, |
|
"grad_norm": 1.8997001647949219, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.6499, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.030965391621129327, |
|
"grad_norm": 3.3398947715759277, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.5904, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03278688524590164, |
|
"grad_norm": 2.688948631286621, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.6029, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03460837887067395, |
|
"grad_norm": 2.952101469039917, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.6377, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03642987249544627, |
|
"grad_norm": 3.182657480239868, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6495, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03825136612021858, |
|
"grad_norm": 5.169290542602539, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.6297, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04007285974499089, |
|
"grad_norm": 4.624154090881348, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.637, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04189435336976321, |
|
"grad_norm": 3.911428689956665, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.5452, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 4.3677897453308105, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.5605, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04553734061930783, |
|
"grad_norm": 2.0296688079833984, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5526, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04735883424408015, |
|
"grad_norm": 5.974031925201416, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.512, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04918032786885246, |
|
"grad_norm": 3.5385329723358154, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.5212, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.051001821493624776, |
|
"grad_norm": 4.718818187713623, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.4359, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.052823315118397086, |
|
"grad_norm": 3.893428325653076, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.4226, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0546448087431694, |
|
"grad_norm": 4.146342754364014, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4003, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.056466302367941715, |
|
"grad_norm": 5.218969821929932, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.3837, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.058287795992714025, |
|
"grad_norm": 6.411149024963379, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.4135, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.060109289617486336, |
|
"grad_norm": 3.4569814205169678, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.2921, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.061930783242258654, |
|
"grad_norm": 4.673402309417725, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.3708, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06375227686703097, |
|
"grad_norm": 4.357000827789307, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3005, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 2.4127838611602783, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1953, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06739526411657559, |
|
"grad_norm": 8.710689544677734, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.3076, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0692167577413479, |
|
"grad_norm": 3.4053244590759277, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1865, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07103825136612021, |
|
"grad_norm": 7.373399257659912, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.244, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07285974499089254, |
|
"grad_norm": 3.437110424041748, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07468123861566485, |
|
"grad_norm": 4.024507999420166, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.1873, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07650273224043716, |
|
"grad_norm": 5.076328277587891, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.1642, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07832422586520947, |
|
"grad_norm": 1.7894034385681152, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0831, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08014571948998178, |
|
"grad_norm": 2.9944159984588623, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.114, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08196721311475409, |
|
"grad_norm": 2.9631662368774414, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0638, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08378870673952642, |
|
"grad_norm": 3.803802251815796, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.2082, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.08561020036429873, |
|
"grad_norm": 10.075150489807129, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.1128, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 8.686117172241211, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.2981, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08925318761384335, |
|
"grad_norm": 6.119321346282959, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.227, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09107468123861566, |
|
"grad_norm": 1.3178825378417969, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0379, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09289617486338798, |
|
"grad_norm": 14.058587074279785, |
|
"learning_rate": 4.996869129618034e-05, |
|
"loss": 0.2593, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0947176684881603, |
|
"grad_norm": 3.1873652935028076, |
|
"learning_rate": 4.993738259236068e-05, |
|
"loss": 0.0374, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0965391621129326, |
|
"grad_norm": 14.453207015991211, |
|
"learning_rate": 4.990607388854102e-05, |
|
"loss": 0.3941, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 1.3862441778182983, |
|
"learning_rate": 4.9874765184721355e-05, |
|
"loss": 0.1079, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10018214936247723, |
|
"grad_norm": 16.57040023803711, |
|
"learning_rate": 4.984345648090169e-05, |
|
"loss": 0.5086, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10200364298724955, |
|
"grad_norm": 19.026596069335938, |
|
"learning_rate": 4.981214777708203e-05, |
|
"loss": 0.3344, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.10382513661202186, |
|
"grad_norm": 10.873573303222656, |
|
"learning_rate": 4.978083907326237e-05, |
|
"loss": 0.145, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.10564663023679417, |
|
"grad_norm": 12.884831428527832, |
|
"learning_rate": 4.974953036944271e-05, |
|
"loss": 0.408, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10746812386156648, |
|
"grad_norm": 13.81652545928955, |
|
"learning_rate": 4.9718221665623046e-05, |
|
"loss": 0.1871, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 9.276785850524902, |
|
"learning_rate": 4.9686912961803384e-05, |
|
"loss": 0.1794, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 3.9966280460357666, |
|
"learning_rate": 4.965560425798372e-05, |
|
"loss": 0.1224, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.11293260473588343, |
|
"grad_norm": 9.78342056274414, |
|
"learning_rate": 4.962429555416406e-05, |
|
"loss": 0.2712, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.11475409836065574, |
|
"grad_norm": 3.808826446533203, |
|
"learning_rate": 4.95929868503444e-05, |
|
"loss": 0.0539, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.11657559198542805, |
|
"grad_norm": 2.176527500152588, |
|
"learning_rate": 4.9561678146524736e-05, |
|
"loss": 0.0676, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11839708561020036, |
|
"grad_norm": 1.8019678592681885, |
|
"learning_rate": 4.9530369442705075e-05, |
|
"loss": 0.1436, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12021857923497267, |
|
"grad_norm": 2.831108570098877, |
|
"learning_rate": 4.949906073888541e-05, |
|
"loss": 0.0529, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.122040072859745, |
|
"grad_norm": 3.580284357070923, |
|
"learning_rate": 4.946775203506575e-05, |
|
"loss": 0.0522, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12386156648451731, |
|
"grad_norm": 1.6722187995910645, |
|
"learning_rate": 4.943644333124609e-05, |
|
"loss": 0.0587, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.12568306010928962, |
|
"grad_norm": 2.407590866088867, |
|
"learning_rate": 4.940513462742643e-05, |
|
"loss": 0.1156, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.12750455373406194, |
|
"grad_norm": 9.305254936218262, |
|
"learning_rate": 4.9373825923606765e-05, |
|
"loss": 0.0965, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12932604735883424, |
|
"grad_norm": 7.728682994842529, |
|
"learning_rate": 4.93425172197871e-05, |
|
"loss": 0.1213, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 2.370173454284668, |
|
"learning_rate": 4.931120851596744e-05, |
|
"loss": 0.0458, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.13296903460837886, |
|
"grad_norm": 15.143721580505371, |
|
"learning_rate": 4.927989981214778e-05, |
|
"loss": 0.358, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.13479052823315119, |
|
"grad_norm": 7.329561233520508, |
|
"learning_rate": 4.924859110832812e-05, |
|
"loss": 0.0722, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1366120218579235, |
|
"grad_norm": 4.661482810974121, |
|
"learning_rate": 4.9217282404508456e-05, |
|
"loss": 0.1194, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1384335154826958, |
|
"grad_norm": 7.151434898376465, |
|
"learning_rate": 4.9185973700688794e-05, |
|
"loss": 0.0753, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.14025500910746813, |
|
"grad_norm": 8.931604385375977, |
|
"learning_rate": 4.915466499686913e-05, |
|
"loss": 0.1828, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.14207650273224043, |
|
"grad_norm": 9.410967826843262, |
|
"learning_rate": 4.912335629304947e-05, |
|
"loss": 0.0992, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.14389799635701275, |
|
"grad_norm": 5.235998153686523, |
|
"learning_rate": 4.909204758922981e-05, |
|
"loss": 0.2115, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.14571948998178508, |
|
"grad_norm": 5.031798839569092, |
|
"learning_rate": 4.906073888541015e-05, |
|
"loss": 0.0448, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14754098360655737, |
|
"grad_norm": 1.3853205442428589, |
|
"learning_rate": 4.9029430181590485e-05, |
|
"loss": 0.0279, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1493624772313297, |
|
"grad_norm": 7.063821792602539, |
|
"learning_rate": 4.899812147777082e-05, |
|
"loss": 0.1193, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.151183970856102, |
|
"grad_norm": 12.036198616027832, |
|
"learning_rate": 4.896681277395116e-05, |
|
"loss": 0.2752, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.15300546448087432, |
|
"grad_norm": 8.796091079711914, |
|
"learning_rate": 4.89355040701315e-05, |
|
"loss": 0.2198, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.15482695810564662, |
|
"grad_norm": 0.6930309534072876, |
|
"learning_rate": 4.890419536631184e-05, |
|
"loss": 0.0153, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.15664845173041894, |
|
"grad_norm": 6.166244029998779, |
|
"learning_rate": 4.8872886662492175e-05, |
|
"loss": 0.163, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.15846994535519127, |
|
"grad_norm": 4.07868766784668, |
|
"learning_rate": 4.8841577958672514e-05, |
|
"loss": 0.0629, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.16029143897996356, |
|
"grad_norm": 6.002202033996582, |
|
"learning_rate": 4.881026925485285e-05, |
|
"loss": 0.1418, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1621129326047359, |
|
"grad_norm": 5.126954078674316, |
|
"learning_rate": 4.877896055103319e-05, |
|
"loss": 0.0691, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.16393442622950818, |
|
"grad_norm": 8.614744186401367, |
|
"learning_rate": 4.874765184721353e-05, |
|
"loss": 0.1183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1657559198542805, |
|
"grad_norm": 4.495233535766602, |
|
"learning_rate": 4.8716343143393866e-05, |
|
"loss": 0.1058, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.16757741347905283, |
|
"grad_norm": 2.912471294403076, |
|
"learning_rate": 4.8685034439574204e-05, |
|
"loss": 0.0322, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.16939890710382513, |
|
"grad_norm": 2.786748170852661, |
|
"learning_rate": 4.865372573575454e-05, |
|
"loss": 0.0289, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.17122040072859745, |
|
"grad_norm": 2.2987825870513916, |
|
"learning_rate": 4.862241703193488e-05, |
|
"loss": 0.0647, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.17304189435336975, |
|
"grad_norm": 2.2475061416625977, |
|
"learning_rate": 4.859110832811522e-05, |
|
"loss": 0.0384, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 4.809598445892334, |
|
"learning_rate": 4.855979962429556e-05, |
|
"loss": 0.1032, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1766848816029144, |
|
"grad_norm": 4.917390823364258, |
|
"learning_rate": 4.8528490920475895e-05, |
|
"loss": 0.1083, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1785063752276867, |
|
"grad_norm": 1.0454902648925781, |
|
"learning_rate": 4.849718221665623e-05, |
|
"loss": 0.0186, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.18032786885245902, |
|
"grad_norm": 3.4447038173675537, |
|
"learning_rate": 4.846587351283657e-05, |
|
"loss": 0.0509, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.18214936247723132, |
|
"grad_norm": 6.711843490600586, |
|
"learning_rate": 4.843456480901691e-05, |
|
"loss": 0.0718, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18397085610200364, |
|
"grad_norm": 3.19818377494812, |
|
"learning_rate": 4.840325610519725e-05, |
|
"loss": 0.0889, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.18579234972677597, |
|
"grad_norm": 10.022863388061523, |
|
"learning_rate": 4.8371947401377586e-05, |
|
"loss": 0.1713, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.18761384335154827, |
|
"grad_norm": 0.2765645682811737, |
|
"learning_rate": 4.8340638697557924e-05, |
|
"loss": 0.0049, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1894353369763206, |
|
"grad_norm": 6.575446605682373, |
|
"learning_rate": 4.830932999373826e-05, |
|
"loss": 0.2178, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1912568306010929, |
|
"grad_norm": 6.493257522583008, |
|
"learning_rate": 4.82780212899186e-05, |
|
"loss": 0.1347, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1930783242258652, |
|
"grad_norm": 7.500410556793213, |
|
"learning_rate": 4.824671258609894e-05, |
|
"loss": 0.0779, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.19489981785063754, |
|
"grad_norm": 9.324666976928711, |
|
"learning_rate": 4.8215403882279276e-05, |
|
"loss": 0.3894, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 2.570295810699463, |
|
"learning_rate": 4.8184095178459615e-05, |
|
"loss": 0.0145, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.19854280510018216, |
|
"grad_norm": 2.4578723907470703, |
|
"learning_rate": 4.815278647463995e-05, |
|
"loss": 0.0212, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.20036429872495445, |
|
"grad_norm": 4.467875957489014, |
|
"learning_rate": 4.812147777082029e-05, |
|
"loss": 0.0766, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.20218579234972678, |
|
"grad_norm": 8.017147064208984, |
|
"learning_rate": 4.809016906700063e-05, |
|
"loss": 0.2337, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2040072859744991, |
|
"grad_norm": 7.299202919006348, |
|
"learning_rate": 4.805886036318097e-05, |
|
"loss": 0.2099, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2058287795992714, |
|
"grad_norm": 1.8824843168258667, |
|
"learning_rate": 4.8027551659361305e-05, |
|
"loss": 0.0292, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.20765027322404372, |
|
"grad_norm": 0.450076699256897, |
|
"learning_rate": 4.7996242955541643e-05, |
|
"loss": 0.0103, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.20947176684881602, |
|
"grad_norm": 6.788011074066162, |
|
"learning_rate": 4.796493425172198e-05, |
|
"loss": 0.074, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.21129326047358835, |
|
"grad_norm": 6.855130195617676, |
|
"learning_rate": 4.793362554790232e-05, |
|
"loss": 0.0793, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.21311475409836064, |
|
"grad_norm": 6.564332962036133, |
|
"learning_rate": 4.790231684408266e-05, |
|
"loss": 0.061, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.21493624772313297, |
|
"grad_norm": 3.254970073699951, |
|
"learning_rate": 4.7871008140262996e-05, |
|
"loss": 0.0462, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2167577413479053, |
|
"grad_norm": 0.28988149762153625, |
|
"learning_rate": 4.7839699436443334e-05, |
|
"loss": 0.0079, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 2.863494634628296, |
|
"learning_rate": 4.780839073262367e-05, |
|
"loss": 0.0674, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2204007285974499, |
|
"grad_norm": 2.85598087310791, |
|
"learning_rate": 4.777708202880401e-05, |
|
"loss": 0.1649, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 1.740670919418335, |
|
"learning_rate": 4.774577332498435e-05, |
|
"loss": 0.0164, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.22404371584699453, |
|
"grad_norm": 1.7192822694778442, |
|
"learning_rate": 4.771446462116469e-05, |
|
"loss": 0.129, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.22586520947176686, |
|
"grad_norm": 5.544397830963135, |
|
"learning_rate": 4.7683155917345025e-05, |
|
"loss": 0.0954, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.22768670309653916, |
|
"grad_norm": 7.164045810699463, |
|
"learning_rate": 4.765184721352536e-05, |
|
"loss": 0.2136, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.22950819672131148, |
|
"grad_norm": 1.1967582702636719, |
|
"learning_rate": 4.76205385097057e-05, |
|
"loss": 0.0317, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.23132969034608378, |
|
"grad_norm": 0.9311371445655823, |
|
"learning_rate": 4.758922980588604e-05, |
|
"loss": 0.1102, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2331511839708561, |
|
"grad_norm": 0.8293129205703735, |
|
"learning_rate": 4.755792110206638e-05, |
|
"loss": 0.0218, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.23497267759562843, |
|
"grad_norm": 1.2660574913024902, |
|
"learning_rate": 4.7526612398246716e-05, |
|
"loss": 0.0278, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.23679417122040072, |
|
"grad_norm": 0.48367011547088623, |
|
"learning_rate": 4.7495303694427054e-05, |
|
"loss": 0.0146, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23861566484517305, |
|
"grad_norm": 0.7170718908309937, |
|
"learning_rate": 4.746399499060739e-05, |
|
"loss": 0.0107, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.24043715846994534, |
|
"grad_norm": 4.402022361755371, |
|
"learning_rate": 4.743268628678773e-05, |
|
"loss": 0.1032, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.24225865209471767, |
|
"grad_norm": 0.09203081578016281, |
|
"learning_rate": 4.740137758296807e-05, |
|
"loss": 0.0029, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.24408014571949, |
|
"grad_norm": 0.09475582838058472, |
|
"learning_rate": 4.7370068879148406e-05, |
|
"loss": 0.0029, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2459016393442623, |
|
"grad_norm": 0.25659048557281494, |
|
"learning_rate": 4.7338760175328744e-05, |
|
"loss": 0.0037, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.24772313296903462, |
|
"grad_norm": 5.615557670593262, |
|
"learning_rate": 4.730745147150908e-05, |
|
"loss": 0.0604, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2495446265938069, |
|
"grad_norm": 8.917096138000488, |
|
"learning_rate": 4.727614276768942e-05, |
|
"loss": 0.0719, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.25136612021857924, |
|
"grad_norm": 10.325088500976562, |
|
"learning_rate": 4.724483406386976e-05, |
|
"loss": 0.2705, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.25318761384335153, |
|
"grad_norm": 0.44606778025627136, |
|
"learning_rate": 4.72135253600501e-05, |
|
"loss": 0.0039, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2550091074681239, |
|
"grad_norm": 5.040135383605957, |
|
"learning_rate": 4.7182216656230435e-05, |
|
"loss": 0.1328, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2568306010928962, |
|
"grad_norm": 2.567768096923828, |
|
"learning_rate": 4.715090795241077e-05, |
|
"loss": 0.0166, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2586520947176685, |
|
"grad_norm": 0.5684707164764404, |
|
"learning_rate": 4.711959924859111e-05, |
|
"loss": 0.0044, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.2604735883424408, |
|
"grad_norm": 5.067185878753662, |
|
"learning_rate": 4.708829054477145e-05, |
|
"loss": 0.0219, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 10.094161987304688, |
|
"learning_rate": 4.705698184095179e-05, |
|
"loss": 0.0192, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2641165755919854, |
|
"grad_norm": 3.966435194015503, |
|
"learning_rate": 4.7025673137132126e-05, |
|
"loss": 0.1723, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2659380692167577, |
|
"grad_norm": 8.564971923828125, |
|
"learning_rate": 4.6994364433312464e-05, |
|
"loss": 0.0367, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2677595628415301, |
|
"grad_norm": 1.8184422254562378, |
|
"learning_rate": 4.69630557294928e-05, |
|
"loss": 0.1991, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.26958105646630237, |
|
"grad_norm": 0.5665643811225891, |
|
"learning_rate": 4.693174702567314e-05, |
|
"loss": 0.005, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.27140255009107467, |
|
"grad_norm": 6.798255920410156, |
|
"learning_rate": 4.690043832185348e-05, |
|
"loss": 0.2519, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.273224043715847, |
|
"grad_norm": 0.10390115529298782, |
|
"learning_rate": 4.6869129618033816e-05, |
|
"loss": 0.0023, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2750455373406193, |
|
"grad_norm": 10.828420639038086, |
|
"learning_rate": 4.6837820914214155e-05, |
|
"loss": 0.2426, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2768670309653916, |
|
"grad_norm": 8.121326446533203, |
|
"learning_rate": 4.680651221039449e-05, |
|
"loss": 0.2702, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2786885245901639, |
|
"grad_norm": 9.042678833007812, |
|
"learning_rate": 4.6775203506574824e-05, |
|
"loss": 0.0417, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.28051001821493626, |
|
"grad_norm": 0.2314717322587967, |
|
"learning_rate": 4.674389480275517e-05, |
|
"loss": 0.0031, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.28233151183970856, |
|
"grad_norm": 0.13141897320747375, |
|
"learning_rate": 4.671258609893551e-05, |
|
"loss": 0.0035, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.28415300546448086, |
|
"grad_norm": 7.087045192718506, |
|
"learning_rate": 4.6681277395115845e-05, |
|
"loss": 0.0734, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2859744990892532, |
|
"grad_norm": 4.150629043579102, |
|
"learning_rate": 4.6649968691296183e-05, |
|
"loss": 0.1494, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2877959927140255, |
|
"grad_norm": 3.2965354919433594, |
|
"learning_rate": 4.661865998747652e-05, |
|
"loss": 0.2095, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2896174863387978, |
|
"grad_norm": 4.767673492431641, |
|
"learning_rate": 4.658735128365686e-05, |
|
"loss": 0.0516, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.29143897996357016, |
|
"grad_norm": 0.28837457299232483, |
|
"learning_rate": 4.65560425798372e-05, |
|
"loss": 0.0066, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29326047358834245, |
|
"grad_norm": 2.4652457237243652, |
|
"learning_rate": 4.6524733876017536e-05, |
|
"loss": 0.0242, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.29508196721311475, |
|
"grad_norm": 8.974453926086426, |
|
"learning_rate": 4.6493425172197874e-05, |
|
"loss": 0.131, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.29690346083788705, |
|
"grad_norm": 6.204460144042969, |
|
"learning_rate": 4.646211646837821e-05, |
|
"loss": 0.0939, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2987249544626594, |
|
"grad_norm": 3.9641923904418945, |
|
"learning_rate": 4.643080776455855e-05, |
|
"loss": 0.0657, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3005464480874317, |
|
"grad_norm": 0.4072006940841675, |
|
"learning_rate": 4.639949906073889e-05, |
|
"loss": 0.0094, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.302367941712204, |
|
"grad_norm": 6.755229473114014, |
|
"learning_rate": 4.636819035691923e-05, |
|
"loss": 0.0565, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.30418943533697634, |
|
"grad_norm": 3.4620673656463623, |
|
"learning_rate": 4.6336881653099565e-05, |
|
"loss": 0.118, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 2.3143203258514404, |
|
"learning_rate": 4.63055729492799e-05, |
|
"loss": 0.0329, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.30783242258652094, |
|
"grad_norm": 4.3909502029418945, |
|
"learning_rate": 4.627426424546024e-05, |
|
"loss": 0.0331, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.30965391621129323, |
|
"grad_norm": 4.431717872619629, |
|
"learning_rate": 4.624295554164057e-05, |
|
"loss": 0.2422, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3114754098360656, |
|
"grad_norm": 5.801990985870361, |
|
"learning_rate": 4.621164683782092e-05, |
|
"loss": 0.1696, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3132969034608379, |
|
"grad_norm": 3.9410240650177, |
|
"learning_rate": 4.6180338134001256e-05, |
|
"loss": 0.105, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3151183970856102, |
|
"grad_norm": 3.1621594429016113, |
|
"learning_rate": 4.6149029430181594e-05, |
|
"loss": 0.1192, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.31693989071038253, |
|
"grad_norm": 0.6451271772384644, |
|
"learning_rate": 4.611772072636193e-05, |
|
"loss": 0.0161, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.31876138433515483, |
|
"grad_norm": 2.4215264320373535, |
|
"learning_rate": 4.608641202254227e-05, |
|
"loss": 0.1183, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3205828779599271, |
|
"grad_norm": 1.0982496738433838, |
|
"learning_rate": 4.605510331872261e-05, |
|
"loss": 0.0156, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3224043715846995, |
|
"grad_norm": 3.5166165828704834, |
|
"learning_rate": 4.6023794614902946e-05, |
|
"loss": 0.0369, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3242258652094718, |
|
"grad_norm": 9.985152244567871, |
|
"learning_rate": 4.5992485911083284e-05, |
|
"loss": 0.2611, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.32604735883424407, |
|
"grad_norm": 7.556087493896484, |
|
"learning_rate": 4.596117720726362e-05, |
|
"loss": 0.1196, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 5.1856255531311035, |
|
"learning_rate": 4.5929868503443954e-05, |
|
"loss": 0.0543, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3296903460837887, |
|
"grad_norm": 2.5084640979766846, |
|
"learning_rate": 4.58985597996243e-05, |
|
"loss": 0.0427, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.331511839708561, |
|
"grad_norm": 4.249439716339111, |
|
"learning_rate": 4.586725109580464e-05, |
|
"loss": 0.0918, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 2.1144330501556396, |
|
"learning_rate": 4.5835942391984975e-05, |
|
"loss": 0.0206, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.33515482695810567, |
|
"grad_norm": 3.1298768520355225, |
|
"learning_rate": 4.580463368816531e-05, |
|
"loss": 0.2067, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.33697632058287796, |
|
"grad_norm": 0.23244377970695496, |
|
"learning_rate": 4.577332498434565e-05, |
|
"loss": 0.0054, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.33879781420765026, |
|
"grad_norm": 5.3826165199279785, |
|
"learning_rate": 4.574201628052599e-05, |
|
"loss": 0.0849, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3406193078324226, |
|
"grad_norm": 1.491070032119751, |
|
"learning_rate": 4.571070757670632e-05, |
|
"loss": 0.0188, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3424408014571949, |
|
"grad_norm": 4.769292831420898, |
|
"learning_rate": 4.5679398872886666e-05, |
|
"loss": 0.0418, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3442622950819672, |
|
"grad_norm": 2.514923334121704, |
|
"learning_rate": 4.5648090169067004e-05, |
|
"loss": 0.0264, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3460837887067395, |
|
"grad_norm": 2.249234914779663, |
|
"learning_rate": 4.561678146524734e-05, |
|
"loss": 0.0439, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.34790528233151186, |
|
"grad_norm": 3.586806535720825, |
|
"learning_rate": 4.558547276142768e-05, |
|
"loss": 0.0383, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 0.31262052059173584, |
|
"learning_rate": 4.555416405760802e-05, |
|
"loss": 0.0061, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.35154826958105645, |
|
"grad_norm": 0.5122743844985962, |
|
"learning_rate": 4.5522855353788357e-05, |
|
"loss": 0.0079, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3533697632058288, |
|
"grad_norm": 6.9489970207214355, |
|
"learning_rate": 4.5491546649968695e-05, |
|
"loss": 0.0527, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3551912568306011, |
|
"grad_norm": 0.07147891819477081, |
|
"learning_rate": 4.546023794614903e-05, |
|
"loss": 0.0026, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3570127504553734, |
|
"grad_norm": 0.07803834974765778, |
|
"learning_rate": 4.542892924232937e-05, |
|
"loss": 0.0026, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3588342440801457, |
|
"grad_norm": 9.417202949523926, |
|
"learning_rate": 4.53976205385097e-05, |
|
"loss": 0.0472, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.36065573770491804, |
|
"grad_norm": 15.531563758850098, |
|
"learning_rate": 4.536631183469005e-05, |
|
"loss": 0.3136, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.36247723132969034, |
|
"grad_norm": 1.265466332435608, |
|
"learning_rate": 4.5335003130870385e-05, |
|
"loss": 0.0104, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.36429872495446264, |
|
"grad_norm": 0.04873238131403923, |
|
"learning_rate": 4.5303694427050724e-05, |
|
"loss": 0.0016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.366120218579235, |
|
"grad_norm": 5.967303276062012, |
|
"learning_rate": 4.527238572323106e-05, |
|
"loss": 0.0346, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3679417122040073, |
|
"grad_norm": 4.412702560424805, |
|
"learning_rate": 4.52410770194114e-05, |
|
"loss": 0.0571, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3697632058287796, |
|
"grad_norm": 4.523514270782471, |
|
"learning_rate": 4.520976831559174e-05, |
|
"loss": 0.1823, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.37158469945355194, |
|
"grad_norm": 0.036965470761060715, |
|
"learning_rate": 4.517845961177207e-05, |
|
"loss": 0.0014, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.37340619307832423, |
|
"grad_norm": 1.5821716785430908, |
|
"learning_rate": 4.5147150907952414e-05, |
|
"loss": 0.0077, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.37522768670309653, |
|
"grad_norm": 9.728230476379395, |
|
"learning_rate": 4.511584220413275e-05, |
|
"loss": 0.1637, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3770491803278688, |
|
"grad_norm": 2.888592481613159, |
|
"learning_rate": 4.5084533500313084e-05, |
|
"loss": 0.016, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3788706739526412, |
|
"grad_norm": 16.919540405273438, |
|
"learning_rate": 4.505322479649343e-05, |
|
"loss": 0.2238, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3806921675774135, |
|
"grad_norm": 7.649754524230957, |
|
"learning_rate": 4.502191609267377e-05, |
|
"loss": 0.0428, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3825136612021858, |
|
"grad_norm": 0.849263072013855, |
|
"learning_rate": 4.4990607388854105e-05, |
|
"loss": 0.0087, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3843351548269581, |
|
"grad_norm": 0.10891333967447281, |
|
"learning_rate": 4.495929868503444e-05, |
|
"loss": 0.0027, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3861566484517304, |
|
"grad_norm": 0.21434666216373444, |
|
"learning_rate": 4.492798998121478e-05, |
|
"loss": 0.0038, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3879781420765027, |
|
"grad_norm": 0.19272175431251526, |
|
"learning_rate": 4.489668127739512e-05, |
|
"loss": 0.0028, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.38979963570127507, |
|
"grad_norm": 1.2215018272399902, |
|
"learning_rate": 4.486537257357545e-05, |
|
"loss": 0.0161, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.39162112932604737, |
|
"grad_norm": 1.3724066019058228, |
|
"learning_rate": 4.4834063869755796e-05, |
|
"loss": 0.0097, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 1.1924035549163818, |
|
"learning_rate": 4.4802755165936134e-05, |
|
"loss": 0.0066, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.39526411657559196, |
|
"grad_norm": 1.784501075744629, |
|
"learning_rate": 4.477144646211647e-05, |
|
"loss": 0.0108, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3970856102003643, |
|
"grad_norm": 0.02774379588663578, |
|
"learning_rate": 4.474013775829681e-05, |
|
"loss": 0.0009, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3989071038251366, |
|
"grad_norm": 0.7612521052360535, |
|
"learning_rate": 4.470882905447715e-05, |
|
"loss": 0.0028, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4007285974499089, |
|
"grad_norm": 18.709510803222656, |
|
"learning_rate": 4.4677520350657486e-05, |
|
"loss": 0.2104, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.40255009107468126, |
|
"grad_norm": 16.760061264038086, |
|
"learning_rate": 4.464621164683782e-05, |
|
"loss": 0.2124, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.40437158469945356, |
|
"grad_norm": 11.746216773986816, |
|
"learning_rate": 4.461490294301816e-05, |
|
"loss": 0.0412, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.40619307832422585, |
|
"grad_norm": 0.067040354013443, |
|
"learning_rate": 4.45835942391985e-05, |
|
"loss": 0.0011, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4080145719489982, |
|
"grad_norm": 0.5200300216674805, |
|
"learning_rate": 4.455228553537883e-05, |
|
"loss": 0.0037, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4098360655737705, |
|
"grad_norm": 8.267187118530273, |
|
"learning_rate": 4.452097683155918e-05, |
|
"loss": 0.1523, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4116575591985428, |
|
"grad_norm": 3.609358549118042, |
|
"learning_rate": 4.4489668127739515e-05, |
|
"loss": 0.1881, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.4134790528233151, |
|
"grad_norm": 0.05134233087301254, |
|
"learning_rate": 4.445835942391985e-05, |
|
"loss": 0.0009, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.41530054644808745, |
|
"grad_norm": 0.025521283969283104, |
|
"learning_rate": 4.442705072010019e-05, |
|
"loss": 0.0008, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.41712204007285975, |
|
"grad_norm": 4.127375602722168, |
|
"learning_rate": 4.439574201628053e-05, |
|
"loss": 0.1777, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.41894353369763204, |
|
"grad_norm": 0.06890428066253662, |
|
"learning_rate": 4.436443331246087e-05, |
|
"loss": 0.0013, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4207650273224044, |
|
"grad_norm": 0.10192258656024933, |
|
"learning_rate": 4.43331246086412e-05, |
|
"loss": 0.0011, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.4225865209471767, |
|
"grad_norm": 11.624174118041992, |
|
"learning_rate": 4.4301815904821544e-05, |
|
"loss": 0.2336, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.424408014571949, |
|
"grad_norm": 7.359781265258789, |
|
"learning_rate": 4.427050720100188e-05, |
|
"loss": 0.0708, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4262295081967213, |
|
"grad_norm": 0.025949914008378983, |
|
"learning_rate": 4.423919849718222e-05, |
|
"loss": 0.0009, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.42805100182149364, |
|
"grad_norm": 16.999971389770508, |
|
"learning_rate": 4.420788979336256e-05, |
|
"loss": 0.0829, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.42987249544626593, |
|
"grad_norm": 1.726982593536377, |
|
"learning_rate": 4.41765810895429e-05, |
|
"loss": 0.0058, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.43169398907103823, |
|
"grad_norm": 5.918796539306641, |
|
"learning_rate": 4.4145272385723235e-05, |
|
"loss": 0.3652, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4335154826958106, |
|
"grad_norm": 0.22916308045387268, |
|
"learning_rate": 4.411396368190357e-05, |
|
"loss": 0.004, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4353369763205829, |
|
"grad_norm": 16.436437606811523, |
|
"learning_rate": 4.408265497808391e-05, |
|
"loss": 0.1857, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 4.63889741897583, |
|
"learning_rate": 4.405134627426425e-05, |
|
"loss": 0.1949, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.43897996357012753, |
|
"grad_norm": 3.314641237258911, |
|
"learning_rate": 4.402003757044458e-05, |
|
"loss": 0.0253, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4408014571948998, |
|
"grad_norm": 5.616635322570801, |
|
"learning_rate": 4.3988728866624925e-05, |
|
"loss": 0.3264, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4426229508196721, |
|
"grad_norm": 3.840766191482544, |
|
"learning_rate": 4.3957420162805264e-05, |
|
"loss": 0.1885, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.6121935844421387, |
|
"learning_rate": 4.39261114589856e-05, |
|
"loss": 0.0043, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.44626593806921677, |
|
"grad_norm": 1.0368307828903198, |
|
"learning_rate": 4.389480275516594e-05, |
|
"loss": 0.0139, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.44808743169398907, |
|
"grad_norm": 0.8692595958709717, |
|
"learning_rate": 4.386349405134628e-05, |
|
"loss": 0.0116, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.44990892531876137, |
|
"grad_norm": 13.962285041809082, |
|
"learning_rate": 4.3832185347526616e-05, |
|
"loss": 0.2261, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4517304189435337, |
|
"grad_norm": 7.238385200500488, |
|
"learning_rate": 4.380087664370695e-05, |
|
"loss": 0.0423, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.453551912568306, |
|
"grad_norm": 0.13841496407985687, |
|
"learning_rate": 4.376956793988729e-05, |
|
"loss": 0.0048, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4553734061930783, |
|
"grad_norm": 2.9722912311553955, |
|
"learning_rate": 4.373825923606763e-05, |
|
"loss": 0.1466, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45719489981785066, |
|
"grad_norm": 2.275719404220581, |
|
"learning_rate": 4.370695053224796e-05, |
|
"loss": 0.0137, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.45901639344262296, |
|
"grad_norm": 2.344390392303467, |
|
"learning_rate": 4.367564182842831e-05, |
|
"loss": 0.0229, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.46083788706739526, |
|
"grad_norm": 2.280900716781616, |
|
"learning_rate": 4.3644333124608645e-05, |
|
"loss": 0.0163, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.46265938069216755, |
|
"grad_norm": 0.29969778656959534, |
|
"learning_rate": 4.361302442078898e-05, |
|
"loss": 0.005, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.4644808743169399, |
|
"grad_norm": 9.383149147033691, |
|
"learning_rate": 4.358171571696932e-05, |
|
"loss": 0.1589, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4663023679417122, |
|
"grad_norm": 2.4629006385803223, |
|
"learning_rate": 4.355040701314966e-05, |
|
"loss": 0.1433, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.4681238615664845, |
|
"grad_norm": 8.262639999389648, |
|
"learning_rate": 4.351909830933e-05, |
|
"loss": 0.0439, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.46994535519125685, |
|
"grad_norm": 0.35271739959716797, |
|
"learning_rate": 4.348778960551033e-05, |
|
"loss": 0.0069, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.47176684881602915, |
|
"grad_norm": 7.017502784729004, |
|
"learning_rate": 4.3456480901690674e-05, |
|
"loss": 0.0536, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.47358834244080145, |
|
"grad_norm": 1.242332935333252, |
|
"learning_rate": 4.342517219787101e-05, |
|
"loss": 0.0127, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.47540983606557374, |
|
"grad_norm": 2.1583664417266846, |
|
"learning_rate": 4.339386349405135e-05, |
|
"loss": 0.0214, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.4772313296903461, |
|
"grad_norm": 4.674497127532959, |
|
"learning_rate": 4.336255479023169e-05, |
|
"loss": 0.0298, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4790528233151184, |
|
"grad_norm": 1.7035185098648071, |
|
"learning_rate": 4.3331246086412026e-05, |
|
"loss": 0.0105, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 0.12772846221923828, |
|
"learning_rate": 4.3299937382592365e-05, |
|
"loss": 0.0023, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.48269581056466304, |
|
"grad_norm": 19.110734939575195, |
|
"learning_rate": 4.3268628678772696e-05, |
|
"loss": 0.4182, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.48451730418943534, |
|
"grad_norm": 9.244805335998535, |
|
"learning_rate": 4.323731997495304e-05, |
|
"loss": 0.2689, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.48633879781420764, |
|
"grad_norm": 1.423405647277832, |
|
"learning_rate": 4.320601127113338e-05, |
|
"loss": 0.0072, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.48816029143898, |
|
"grad_norm": 5.039240837097168, |
|
"learning_rate": 4.317470256731371e-05, |
|
"loss": 0.1362, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.4899817850637523, |
|
"grad_norm": 6.24086856842041, |
|
"learning_rate": 4.3143393863494055e-05, |
|
"loss": 0.1773, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 1.5966615676879883, |
|
"learning_rate": 4.3112085159674393e-05, |
|
"loss": 0.1787, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4936247723132969, |
|
"grad_norm": 3.686453104019165, |
|
"learning_rate": 4.308077645585473e-05, |
|
"loss": 0.3175, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.49544626593806923, |
|
"grad_norm": 0.04620608314871788, |
|
"learning_rate": 4.304946775203507e-05, |
|
"loss": 0.0016, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4972677595628415, |
|
"grad_norm": 0.20282939076423645, |
|
"learning_rate": 4.301815904821541e-05, |
|
"loss": 0.003, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4990892531876138, |
|
"grad_norm": 0.0827542319893837, |
|
"learning_rate": 4.2986850344395746e-05, |
|
"loss": 0.0029, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5009107468123861, |
|
"grad_norm": 0.29965463280677795, |
|
"learning_rate": 4.295554164057608e-05, |
|
"loss": 0.0063, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5027322404371585, |
|
"grad_norm": 4.543389320373535, |
|
"learning_rate": 4.292423293675642e-05, |
|
"loss": 0.0712, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5045537340619308, |
|
"grad_norm": 2.681236743927002, |
|
"learning_rate": 4.289292423293676e-05, |
|
"loss": 0.0391, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5063752276867031, |
|
"grad_norm": 2.3870694637298584, |
|
"learning_rate": 4.286161552911709e-05, |
|
"loss": 0.028, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5081967213114754, |
|
"grad_norm": 1.0104269981384277, |
|
"learning_rate": 4.283030682529744e-05, |
|
"loss": 0.0179, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5100182149362478, |
|
"grad_norm": 0.17253230512142181, |
|
"learning_rate": 4.2798998121477775e-05, |
|
"loss": 0.0052, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.51183970856102, |
|
"grad_norm": 0.27264872193336487, |
|
"learning_rate": 4.276768941765811e-05, |
|
"loss": 0.0086, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5136612021857924, |
|
"grad_norm": 4.211297988891602, |
|
"learning_rate": 4.2736380713838444e-05, |
|
"loss": 0.0984, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5154826958105647, |
|
"grad_norm": 0.8885019421577454, |
|
"learning_rate": 4.270507201001879e-05, |
|
"loss": 0.0076, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.517304189435337, |
|
"grad_norm": 0.921626091003418, |
|
"learning_rate": 4.267376330619913e-05, |
|
"loss": 0.0087, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5191256830601093, |
|
"grad_norm": 0.955025851726532, |
|
"learning_rate": 4.264245460237946e-05, |
|
"loss": 0.0131, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5209471766848816, |
|
"grad_norm": 6.5410919189453125, |
|
"learning_rate": 4.2611145898559804e-05, |
|
"loss": 0.0893, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5227686703096539, |
|
"grad_norm": 10.288850784301758, |
|
"learning_rate": 4.257983719474014e-05, |
|
"loss": 0.0205, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 0.04318219795823097, |
|
"learning_rate": 4.254852849092048e-05, |
|
"loss": 0.0017, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5264116575591985, |
|
"grad_norm": 2.292560577392578, |
|
"learning_rate": 4.251721978710082e-05, |
|
"loss": 0.1756, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5282331511839709, |
|
"grad_norm": 3.368222713470459, |
|
"learning_rate": 4.2485911083281156e-05, |
|
"loss": 0.0199, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5300546448087432, |
|
"grad_norm": 4.767663955688477, |
|
"learning_rate": 4.2454602379461494e-05, |
|
"loss": 0.1685, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5318761384335154, |
|
"grad_norm": 0.11847999691963196, |
|
"learning_rate": 4.2423293675641826e-05, |
|
"loss": 0.0029, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5336976320582878, |
|
"grad_norm": 6.336174488067627, |
|
"learning_rate": 4.239198497182217e-05, |
|
"loss": 0.0425, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5355191256830601, |
|
"grad_norm": 15.296269416809082, |
|
"learning_rate": 4.236067626800251e-05, |
|
"loss": 0.0794, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5373406193078324, |
|
"grad_norm": 0.18433484435081482, |
|
"learning_rate": 4.232936756418284e-05, |
|
"loss": 0.0039, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5391621129326047, |
|
"grad_norm": 0.14353422820568085, |
|
"learning_rate": 4.2298058860363185e-05, |
|
"loss": 0.003, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5409836065573771, |
|
"grad_norm": 3.668611526489258, |
|
"learning_rate": 4.226675015654352e-05, |
|
"loss": 0.0258, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5428051001821493, |
|
"grad_norm": 3.0370147228240967, |
|
"learning_rate": 4.223544145272386e-05, |
|
"loss": 0.0125, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5446265938069217, |
|
"grad_norm": 3.1501262187957764, |
|
"learning_rate": 4.220413274890419e-05, |
|
"loss": 0.165, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 1.3201171159744263, |
|
"learning_rate": 4.217282404508454e-05, |
|
"loss": 0.0116, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5482695810564663, |
|
"grad_norm": 0.2336684912443161, |
|
"learning_rate": 4.2141515341264876e-05, |
|
"loss": 0.0035, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5500910746812386, |
|
"grad_norm": 9.169921875, |
|
"learning_rate": 4.211020663744521e-05, |
|
"loss": 0.0728, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5519125683060109, |
|
"grad_norm": 0.07561606913805008, |
|
"learning_rate": 4.207889793362555e-05, |
|
"loss": 0.0026, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5537340619307832, |
|
"grad_norm": 1.609074592590332, |
|
"learning_rate": 4.204758922980589e-05, |
|
"loss": 0.0113, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 3.4536828994750977, |
|
"learning_rate": 4.201628052598622e-05, |
|
"loss": 0.0147, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5573770491803278, |
|
"grad_norm": 6.3433918952941895, |
|
"learning_rate": 4.1984971822166566e-05, |
|
"loss": 0.2036, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5591985428051002, |
|
"grad_norm": 4.692470550537109, |
|
"learning_rate": 4.1953663118346905e-05, |
|
"loss": 0.0401, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5610200364298725, |
|
"grad_norm": 6.455625057220459, |
|
"learning_rate": 4.192235441452724e-05, |
|
"loss": 0.0697, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.5628415300546448, |
|
"grad_norm": 4.6514716148376465, |
|
"learning_rate": 4.1891045710707574e-05, |
|
"loss": 0.1204, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5646630236794171, |
|
"grad_norm": 3.956284523010254, |
|
"learning_rate": 4.185973700688792e-05, |
|
"loss": 0.1731, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5664845173041895, |
|
"grad_norm": 1.0369857549667358, |
|
"learning_rate": 4.182842830306826e-05, |
|
"loss": 0.0108, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 0.22317107021808624, |
|
"learning_rate": 4.179711959924859e-05, |
|
"loss": 0.0027, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5701275045537341, |
|
"grad_norm": 6.622973442077637, |
|
"learning_rate": 4.1765810895428933e-05, |
|
"loss": 0.0617, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5719489981785064, |
|
"grad_norm": 6.9914422035217285, |
|
"learning_rate": 4.173450219160927e-05, |
|
"loss": 0.08, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5737704918032787, |
|
"grad_norm": 9.093573570251465, |
|
"learning_rate": 4.170319348778961e-05, |
|
"loss": 0.1431, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.575591985428051, |
|
"grad_norm": 0.36630722880363464, |
|
"learning_rate": 4.167188478396994e-05, |
|
"loss": 0.0064, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5774134790528234, |
|
"grad_norm": 4.634314060211182, |
|
"learning_rate": 4.1640576080150286e-05, |
|
"loss": 0.1175, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.5792349726775956, |
|
"grad_norm": 0.327493816614151, |
|
"learning_rate": 4.1609267376330624e-05, |
|
"loss": 0.0048, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.581056466302368, |
|
"grad_norm": 0.24527287483215332, |
|
"learning_rate": 4.1577958672510956e-05, |
|
"loss": 0.0042, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5828779599271403, |
|
"grad_norm": 5.85356330871582, |
|
"learning_rate": 4.15466499686913e-05, |
|
"loss": 0.0292, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5846994535519126, |
|
"grad_norm": 0.6484350562095642, |
|
"learning_rate": 4.151534126487164e-05, |
|
"loss": 0.0061, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5865209471766849, |
|
"grad_norm": 2.3831231594085693, |
|
"learning_rate": 4.148403256105197e-05, |
|
"loss": 0.023, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5883424408014571, |
|
"grad_norm": 0.4163350462913513, |
|
"learning_rate": 4.1452723857232315e-05, |
|
"loss": 0.0057, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 3.6175761222839355, |
|
"learning_rate": 4.142141515341265e-05, |
|
"loss": 0.0257, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5919854280510018, |
|
"grad_norm": 2.211493730545044, |
|
"learning_rate": 4.139010644959299e-05, |
|
"loss": 0.0206, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5938069216757741, |
|
"grad_norm": 0.03227696940302849, |
|
"learning_rate": 4.135879774577332e-05, |
|
"loss": 0.0011, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5956284153005464, |
|
"grad_norm": 0.03025558590888977, |
|
"learning_rate": 4.132748904195367e-05, |
|
"loss": 0.001, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5974499089253188, |
|
"grad_norm": 0.027479926124215126, |
|
"learning_rate": 4.1296180338134006e-05, |
|
"loss": 0.0009, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.599271402550091, |
|
"grad_norm": 0.07558054476976395, |
|
"learning_rate": 4.126487163431434e-05, |
|
"loss": 0.0015, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6010928961748634, |
|
"grad_norm": 2.8735299110412598, |
|
"learning_rate": 4.123356293049468e-05, |
|
"loss": 0.0178, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6029143897996357, |
|
"grad_norm": 5.421073913574219, |
|
"learning_rate": 4.120225422667502e-05, |
|
"loss": 0.1629, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.604735883424408, |
|
"grad_norm": 3.4357104301452637, |
|
"learning_rate": 4.117094552285535e-05, |
|
"loss": 0.1494, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6065573770491803, |
|
"grad_norm": 3.9810731410980225, |
|
"learning_rate": 4.113963681903569e-05, |
|
"loss": 0.1603, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6083788706739527, |
|
"grad_norm": 0.08673622459173203, |
|
"learning_rate": 4.1108328115216034e-05, |
|
"loss": 0.0013, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6102003642987249, |
|
"grad_norm": 0.03046957589685917, |
|
"learning_rate": 4.107701941139637e-05, |
|
"loss": 0.0011, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 11.446281433105469, |
|
"learning_rate": 4.1045710707576704e-05, |
|
"loss": 0.0644, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6138433515482696, |
|
"grad_norm": 1.642142415046692, |
|
"learning_rate": 4.101440200375705e-05, |
|
"loss": 0.0056, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6156648451730419, |
|
"grad_norm": 1.6858967542648315, |
|
"learning_rate": 4.098309329993739e-05, |
|
"loss": 0.0133, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6174863387978142, |
|
"grad_norm": 0.04817913472652435, |
|
"learning_rate": 4.095178459611772e-05, |
|
"loss": 0.0013, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6193078324225865, |
|
"grad_norm": 1.458479642868042, |
|
"learning_rate": 4.092047589229806e-05, |
|
"loss": 0.0078, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6211293260473588, |
|
"grad_norm": 0.7223337292671204, |
|
"learning_rate": 4.08891671884784e-05, |
|
"loss": 0.0167, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6229508196721312, |
|
"grad_norm": 0.4470398426055908, |
|
"learning_rate": 4.085785848465874e-05, |
|
"loss": 0.0043, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6247723132969034, |
|
"grad_norm": 0.10019668936729431, |
|
"learning_rate": 4.082654978083907e-05, |
|
"loss": 0.0018, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6265938069216758, |
|
"grad_norm": 0.07710490375757217, |
|
"learning_rate": 4.0795241077019416e-05, |
|
"loss": 0.0014, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.6284153005464481, |
|
"grad_norm": 0.49735426902770996, |
|
"learning_rate": 4.0763932373199754e-05, |
|
"loss": 0.0083, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6302367941712204, |
|
"grad_norm": 7.425228118896484, |
|
"learning_rate": 4.0732623669380085e-05, |
|
"loss": 0.4383, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6320582877959927, |
|
"grad_norm": 9.259267807006836, |
|
"learning_rate": 4.070131496556043e-05, |
|
"loss": 0.0335, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.6338797814207651, |
|
"grad_norm": 1.7133445739746094, |
|
"learning_rate": 4.067000626174077e-05, |
|
"loss": 0.1959, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6357012750455373, |
|
"grad_norm": 7.2402238845825195, |
|
"learning_rate": 4.06386975579211e-05, |
|
"loss": 0.0182, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.6375227686703097, |
|
"grad_norm": 1.9264317750930786, |
|
"learning_rate": 4.0607388854101445e-05, |
|
"loss": 0.0096, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.639344262295082, |
|
"grad_norm": 5.997550964355469, |
|
"learning_rate": 4.057608015028178e-05, |
|
"loss": 0.2692, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.6411657559198543, |
|
"grad_norm": 0.8955827355384827, |
|
"learning_rate": 4.054477144646212e-05, |
|
"loss": 0.008, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6429872495446266, |
|
"grad_norm": 15.662752151489258, |
|
"learning_rate": 4.051346274264245e-05, |
|
"loss": 0.0583, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.644808743169399, |
|
"grad_norm": 0.17019307613372803, |
|
"learning_rate": 4.04821540388228e-05, |
|
"loss": 0.0045, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.6466302367941712, |
|
"grad_norm": 0.09931718558073044, |
|
"learning_rate": 4.0450845335003135e-05, |
|
"loss": 0.0026, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6484517304189436, |
|
"grad_norm": 0.13057781755924225, |
|
"learning_rate": 4.041953663118347e-05, |
|
"loss": 0.0043, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6502732240437158, |
|
"grad_norm": 8.035531044006348, |
|
"learning_rate": 4.038822792736381e-05, |
|
"loss": 0.1404, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6520947176684881, |
|
"grad_norm": 5.847539901733398, |
|
"learning_rate": 4.035691922354415e-05, |
|
"loss": 0.096, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6539162112932605, |
|
"grad_norm": 2.056274890899658, |
|
"learning_rate": 4.032561051972448e-05, |
|
"loss": 0.0151, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.13496744632720947, |
|
"learning_rate": 4.029430181590482e-05, |
|
"loss": 0.0037, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6575591985428051, |
|
"grad_norm": 1.0296528339385986, |
|
"learning_rate": 4.0262993112085164e-05, |
|
"loss": 0.0107, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.6593806921675774, |
|
"grad_norm": 5.667383193969727, |
|
"learning_rate": 4.02316844082655e-05, |
|
"loss": 0.0813, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.6612021857923497, |
|
"grad_norm": 4.281712055206299, |
|
"learning_rate": 4.0200375704445834e-05, |
|
"loss": 0.0377, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.663023679417122, |
|
"grad_norm": 0.37562137842178345, |
|
"learning_rate": 4.016906700062618e-05, |
|
"loss": 0.0068, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6648451730418944, |
|
"grad_norm": 7.240072727203369, |
|
"learning_rate": 4.013775829680652e-05, |
|
"loss": 0.0583, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 1.4774280786514282, |
|
"learning_rate": 4.010644959298685e-05, |
|
"loss": 0.0111, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.668488160291439, |
|
"grad_norm": 2.835345983505249, |
|
"learning_rate": 4.007514088916719e-05, |
|
"loss": 0.048, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6703096539162113, |
|
"grad_norm": 0.5821844935417175, |
|
"learning_rate": 4.004383218534753e-05, |
|
"loss": 0.0049, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6721311475409836, |
|
"grad_norm": 3.108658790588379, |
|
"learning_rate": 4.001252348152787e-05, |
|
"loss": 0.0191, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6739526411657559, |
|
"grad_norm": 8.561149597167969, |
|
"learning_rate": 3.99812147777082e-05, |
|
"loss": 0.0647, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6757741347905283, |
|
"grad_norm": 0.19415520131587982, |
|
"learning_rate": 3.9949906073888546e-05, |
|
"loss": 0.0029, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 0.06202491745352745, |
|
"learning_rate": 3.9918597370068884e-05, |
|
"loss": 0.0018, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6794171220400729, |
|
"grad_norm": 3.6461710929870605, |
|
"learning_rate": 3.9887288666249215e-05, |
|
"loss": 0.0083, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6812386156648452, |
|
"grad_norm": 15.124981880187988, |
|
"learning_rate": 3.985597996242956e-05, |
|
"loss": 0.1567, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6830601092896175, |
|
"grad_norm": 2.153791666030884, |
|
"learning_rate": 3.98246712586099e-05, |
|
"loss": 0.0105, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6848816029143898, |
|
"grad_norm": 0.7105178236961365, |
|
"learning_rate": 3.979336255479023e-05, |
|
"loss": 0.0032, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.6867030965391621, |
|
"grad_norm": 0.782936692237854, |
|
"learning_rate": 3.976205385097057e-05, |
|
"loss": 0.0053, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6885245901639344, |
|
"grad_norm": 17.51541519165039, |
|
"learning_rate": 3.973074514715091e-05, |
|
"loss": 0.0756, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6903460837887068, |
|
"grad_norm": 8.316147804260254, |
|
"learning_rate": 3.969943644333125e-05, |
|
"loss": 0.0734, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.692167577413479, |
|
"grad_norm": 0.5029911398887634, |
|
"learning_rate": 3.966812773951158e-05, |
|
"loss": 0.0039, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6939890710382514, |
|
"grad_norm": 0.038834672421216965, |
|
"learning_rate": 3.963681903569193e-05, |
|
"loss": 0.001, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6958105646630237, |
|
"grad_norm": 6.278717517852783, |
|
"learning_rate": 3.9605510331872265e-05, |
|
"loss": 0.0977, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.697632058287796, |
|
"grad_norm": 0.07930924743413925, |
|
"learning_rate": 3.9574201628052597e-05, |
|
"loss": 0.001, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 0.039321571588516235, |
|
"learning_rate": 3.954289292423294e-05, |
|
"loss": 0.0008, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7012750455373407, |
|
"grad_norm": 0.296947717666626, |
|
"learning_rate": 3.951158422041328e-05, |
|
"loss": 0.0024, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7030965391621129, |
|
"grad_norm": 9.602386474609375, |
|
"learning_rate": 3.948027551659361e-05, |
|
"loss": 0.2459, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7049180327868853, |
|
"grad_norm": 0.015402719378471375, |
|
"learning_rate": 3.944896681277395e-05, |
|
"loss": 0.0006, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7067395264116576, |
|
"grad_norm": 0.014821278862655163, |
|
"learning_rate": 3.9417658108954294e-05, |
|
"loss": 0.0005, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7085610200364298, |
|
"grad_norm": 0.03218044340610504, |
|
"learning_rate": 3.938634940513463e-05, |
|
"loss": 0.0008, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7103825136612022, |
|
"grad_norm": 0.2660595774650574, |
|
"learning_rate": 3.9355040701314964e-05, |
|
"loss": 0.002, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7122040072859745, |
|
"grad_norm": 10.710992813110352, |
|
"learning_rate": 3.932373199749531e-05, |
|
"loss": 0.1631, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7140255009107468, |
|
"grad_norm": 0.07790596038103104, |
|
"learning_rate": 3.929242329367565e-05, |
|
"loss": 0.0011, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7158469945355191, |
|
"grad_norm": 6.2742462158203125, |
|
"learning_rate": 3.926111458985598e-05, |
|
"loss": 0.0302, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7176684881602914, |
|
"grad_norm": 2.138582706451416, |
|
"learning_rate": 3.9229805886036316e-05, |
|
"loss": 0.1987, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7194899817850637, |
|
"grad_norm": 0.017461583018302917, |
|
"learning_rate": 3.919849718221666e-05, |
|
"loss": 0.0005, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7213114754098361, |
|
"grad_norm": 0.812929093837738, |
|
"learning_rate": 3.9167188478397e-05, |
|
"loss": 0.0035, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.7231329690346083, |
|
"grad_norm": 0.6114425659179688, |
|
"learning_rate": 3.913587977457733e-05, |
|
"loss": 0.0053, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7249544626593807, |
|
"grad_norm": 4.900968074798584, |
|
"learning_rate": 3.9104571070757675e-05, |
|
"loss": 0.212, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.726775956284153, |
|
"grad_norm": 0.29532384872436523, |
|
"learning_rate": 3.9073262366938014e-05, |
|
"loss": 0.0041, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7285974499089253, |
|
"grad_norm": 8.498466491699219, |
|
"learning_rate": 3.9041953663118345e-05, |
|
"loss": 0.0716, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7304189435336976, |
|
"grad_norm": 13.04411792755127, |
|
"learning_rate": 3.901064495929869e-05, |
|
"loss": 0.0875, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.73224043715847, |
|
"grad_norm": 0.21621406078338623, |
|
"learning_rate": 3.897933625547903e-05, |
|
"loss": 0.002, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7340619307832422, |
|
"grad_norm": 3.328409194946289, |
|
"learning_rate": 3.894802755165936e-05, |
|
"loss": 0.0098, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7358834244080146, |
|
"grad_norm": 9.559609413146973, |
|
"learning_rate": 3.89167188478397e-05, |
|
"loss": 0.1343, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.7377049180327869, |
|
"grad_norm": 0.6983762979507446, |
|
"learning_rate": 3.888541014402004e-05, |
|
"loss": 0.0027, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7395264116575592, |
|
"grad_norm": 0.05780564993619919, |
|
"learning_rate": 3.885410144020038e-05, |
|
"loss": 0.0012, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7413479052823315, |
|
"grad_norm": 1.37694251537323, |
|
"learning_rate": 3.882279273638071e-05, |
|
"loss": 0.2163, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 0.019220901653170586, |
|
"learning_rate": 3.879148403256106e-05, |
|
"loss": 0.0007, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.7449908925318761, |
|
"grad_norm": 1.4538003206253052, |
|
"learning_rate": 3.8760175328741395e-05, |
|
"loss": 0.0067, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.7468123861566485, |
|
"grad_norm": 0.03995515778660774, |
|
"learning_rate": 3.8728866624921726e-05, |
|
"loss": 0.0013, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7486338797814208, |
|
"grad_norm": 7.3194146156311035, |
|
"learning_rate": 3.8697557921102065e-05, |
|
"loss": 0.071, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.7504553734061931, |
|
"grad_norm": 5.2992024421691895, |
|
"learning_rate": 3.866624921728241e-05, |
|
"loss": 0.114, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.7522768670309654, |
|
"grad_norm": 1.8797881603240967, |
|
"learning_rate": 3.863494051346274e-05, |
|
"loss": 0.0116, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.7540983606557377, |
|
"grad_norm": 0.04209226742386818, |
|
"learning_rate": 3.860363180964308e-05, |
|
"loss": 0.0015, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.75591985428051, |
|
"grad_norm": 9.969036102294922, |
|
"learning_rate": 3.8572323105823424e-05, |
|
"loss": 0.1007, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7577413479052824, |
|
"grad_norm": 4.476265907287598, |
|
"learning_rate": 3.854101440200376e-05, |
|
"loss": 0.0236, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.7595628415300546, |
|
"grad_norm": 5.227812767028809, |
|
"learning_rate": 3.850970569818409e-05, |
|
"loss": 0.1293, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.761384335154827, |
|
"grad_norm": 0.5161237120628357, |
|
"learning_rate": 3.847839699436444e-05, |
|
"loss": 0.0068, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7632058287795993, |
|
"grad_norm": 0.06678824126720428, |
|
"learning_rate": 3.8447088290544776e-05, |
|
"loss": 0.0022, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 11.278353691101074, |
|
"learning_rate": 3.841577958672511e-05, |
|
"loss": 0.0381, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7668488160291439, |
|
"grad_norm": 1.4786038398742676, |
|
"learning_rate": 3.8384470882905446e-05, |
|
"loss": 0.0134, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7686703096539163, |
|
"grad_norm": 3.063671588897705, |
|
"learning_rate": 3.835316217908579e-05, |
|
"loss": 0.0226, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7704918032786885, |
|
"grad_norm": 0.3474072813987732, |
|
"learning_rate": 3.832185347526613e-05, |
|
"loss": 0.0075, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7723132969034608, |
|
"grad_norm": 0.31541740894317627, |
|
"learning_rate": 3.829054477144646e-05, |
|
"loss": 0.0058, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7741347905282332, |
|
"grad_norm": 1.1225982904434204, |
|
"learning_rate": 3.8259236067626805e-05, |
|
"loss": 0.1693, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7759562841530054, |
|
"grad_norm": 0.09978262335062027, |
|
"learning_rate": 3.8227927363807143e-05, |
|
"loss": 0.0031, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.08662858605384827, |
|
"learning_rate": 3.8196618659987475e-05, |
|
"loss": 0.0021, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7795992714025501, |
|
"grad_norm": 0.11617692559957504, |
|
"learning_rate": 3.816530995616781e-05, |
|
"loss": 0.004, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.7814207650273224, |
|
"grad_norm": 0.17725755274295807, |
|
"learning_rate": 3.813400125234816e-05, |
|
"loss": 0.0034, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7832422586520947, |
|
"grad_norm": 11.300631523132324, |
|
"learning_rate": 3.810269254852849e-05, |
|
"loss": 0.0706, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.785063752276867, |
|
"grad_norm": 0.10579409450292587, |
|
"learning_rate": 3.807138384470883e-05, |
|
"loss": 0.0033, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 2.0246171951293945, |
|
"learning_rate": 3.804007514088917e-05, |
|
"loss": 0.157, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7887067395264117, |
|
"grad_norm": 9.128832817077637, |
|
"learning_rate": 3.800876643706951e-05, |
|
"loss": 0.0421, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7905282331511839, |
|
"grad_norm": 1.876198172569275, |
|
"learning_rate": 3.797745773324984e-05, |
|
"loss": 0.0129, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7923497267759563, |
|
"grad_norm": 2.0219812393188477, |
|
"learning_rate": 3.794614902943019e-05, |
|
"loss": 0.1312, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7941712204007286, |
|
"grad_norm": 1.8168342113494873, |
|
"learning_rate": 3.7914840325610525e-05, |
|
"loss": 0.079, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7959927140255009, |
|
"grad_norm": 9.818557739257812, |
|
"learning_rate": 3.7883531621790856e-05, |
|
"loss": 0.2222, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7978142076502732, |
|
"grad_norm": 10.846918106079102, |
|
"learning_rate": 3.7852222917971194e-05, |
|
"loss": 0.1435, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7996357012750456, |
|
"grad_norm": 10.874890327453613, |
|
"learning_rate": 3.782091421415154e-05, |
|
"loss": 0.1216, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8014571948998178, |
|
"grad_norm": 10.522339820861816, |
|
"learning_rate": 3.778960551033187e-05, |
|
"loss": 0.0773, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8032786885245902, |
|
"grad_norm": 11.772398948669434, |
|
"learning_rate": 3.775829680651221e-05, |
|
"loss": 0.5585, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.8051001821493625, |
|
"grad_norm": 2.1887152194976807, |
|
"learning_rate": 3.7726988102692554e-05, |
|
"loss": 0.041, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.8069216757741348, |
|
"grad_norm": 1.6691868305206299, |
|
"learning_rate": 3.769567939887289e-05, |
|
"loss": 0.0391, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.8087431693989071, |
|
"grad_norm": 1.4274911880493164, |
|
"learning_rate": 3.766437069505322e-05, |
|
"loss": 0.0277, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.8105646630236795, |
|
"grad_norm": 0.6223703026771545, |
|
"learning_rate": 3.763306199123356e-05, |
|
"loss": 0.0171, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8123861566484517, |
|
"grad_norm": 0.23633567988872528, |
|
"learning_rate": 3.7601753287413906e-05, |
|
"loss": 0.008, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.8142076502732241, |
|
"grad_norm": 1.7008121013641357, |
|
"learning_rate": 3.757044458359424e-05, |
|
"loss": 0.0079, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.8160291438979964, |
|
"grad_norm": 0.08883205056190491, |
|
"learning_rate": 3.7539135879774576e-05, |
|
"loss": 0.0032, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.8178506375227687, |
|
"grad_norm": 2.3876216411590576, |
|
"learning_rate": 3.750782717595492e-05, |
|
"loss": 0.111, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 5.239230155944824, |
|
"learning_rate": 3.747651847213526e-05, |
|
"loss": 0.0726, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8214936247723132, |
|
"grad_norm": 4.39957857131958, |
|
"learning_rate": 3.744520976831559e-05, |
|
"loss": 0.0349, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.8233151183970856, |
|
"grad_norm": 4.1816534996032715, |
|
"learning_rate": 3.7413901064495935e-05, |
|
"loss": 0.1397, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.825136612021858, |
|
"grad_norm": 0.13828523457050323, |
|
"learning_rate": 3.738259236067627e-05, |
|
"loss": 0.0037, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.8269581056466302, |
|
"grad_norm": 0.7528238892555237, |
|
"learning_rate": 3.7351283656856605e-05, |
|
"loss": 0.0062, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.8287795992714025, |
|
"grad_norm": 11.578791618347168, |
|
"learning_rate": 3.731997495303694e-05, |
|
"loss": 0.2069, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 0.3673207759857178, |
|
"learning_rate": 3.728866624921729e-05, |
|
"loss": 0.0033, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.8324225865209471, |
|
"grad_norm": 0.3435652554035187, |
|
"learning_rate": 3.725735754539762e-05, |
|
"loss": 0.003, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.8342440801457195, |
|
"grad_norm": 0.055871132761240005, |
|
"learning_rate": 3.722604884157796e-05, |
|
"loss": 0.0019, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.8360655737704918, |
|
"grad_norm": 5.986182689666748, |
|
"learning_rate": 3.71947401377583e-05, |
|
"loss": 0.0398, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.8378870673952641, |
|
"grad_norm": 2.631530284881592, |
|
"learning_rate": 3.716343143393864e-05, |
|
"loss": 0.0158, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8397085610200364, |
|
"grad_norm": 12.033699035644531, |
|
"learning_rate": 3.713212273011897e-05, |
|
"loss": 0.1011, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.8415300546448088, |
|
"grad_norm": 0.41058558225631714, |
|
"learning_rate": 3.7100814026299316e-05, |
|
"loss": 0.0047, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.843351548269581, |
|
"grad_norm": 2.519164800643921, |
|
"learning_rate": 3.7069505322479655e-05, |
|
"loss": 0.2319, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.8451730418943534, |
|
"grad_norm": 0.09700655937194824, |
|
"learning_rate": 3.7038196618659986e-05, |
|
"loss": 0.0021, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.8469945355191257, |
|
"grad_norm": 1.427298903465271, |
|
"learning_rate": 3.7006887914840324e-05, |
|
"loss": 0.0121, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.848816029143898, |
|
"grad_norm": 2.7123031616210938, |
|
"learning_rate": 3.697557921102067e-05, |
|
"loss": 0.0099, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.8506375227686703, |
|
"grad_norm": 0.1832200437784195, |
|
"learning_rate": 3.6944270507201e-05, |
|
"loss": 0.0016, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.8524590163934426, |
|
"grad_norm": 0.05797650292515755, |
|
"learning_rate": 3.691296180338134e-05, |
|
"loss": 0.0017, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.8542805100182149, |
|
"grad_norm": 11.710906028747559, |
|
"learning_rate": 3.6881653099561683e-05, |
|
"loss": 0.2383, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.8561020036429873, |
|
"grad_norm": 0.06885645538568497, |
|
"learning_rate": 3.685034439574202e-05, |
|
"loss": 0.0019, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8579234972677595, |
|
"grad_norm": 0.06676291674375534, |
|
"learning_rate": 3.681903569192235e-05, |
|
"loss": 0.0019, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.8597449908925319, |
|
"grad_norm": 1.3994064331054688, |
|
"learning_rate": 3.678772698810269e-05, |
|
"loss": 0.0091, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.8615664845173042, |
|
"grad_norm": 0.18276052176952362, |
|
"learning_rate": 3.6756418284283036e-05, |
|
"loss": 0.0018, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.8633879781420765, |
|
"grad_norm": 0.28978365659713745, |
|
"learning_rate": 3.672510958046337e-05, |
|
"loss": 0.0033, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.8652094717668488, |
|
"grad_norm": 0.03744082152843475, |
|
"learning_rate": 3.6693800876643706e-05, |
|
"loss": 0.0013, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8670309653916212, |
|
"grad_norm": 2.398526906967163, |
|
"learning_rate": 3.666249217282405e-05, |
|
"loss": 0.0122, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8688524590163934, |
|
"grad_norm": 0.02406764030456543, |
|
"learning_rate": 3.663118346900439e-05, |
|
"loss": 0.0009, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8706739526411658, |
|
"grad_norm": 0.37078964710235596, |
|
"learning_rate": 3.659987476518472e-05, |
|
"loss": 0.0022, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8724954462659381, |
|
"grad_norm": 4.862973213195801, |
|
"learning_rate": 3.6568566061365065e-05, |
|
"loss": 0.0194, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 0.055899012833833694, |
|
"learning_rate": 3.65372573575454e-05, |
|
"loss": 0.0012, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8761384335154827, |
|
"grad_norm": 4.453655242919922, |
|
"learning_rate": 3.6505948653725734e-05, |
|
"loss": 0.1545, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8779599271402551, |
|
"grad_norm": 0.08148118853569031, |
|
"learning_rate": 3.647463994990607e-05, |
|
"loss": 0.0015, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8797814207650273, |
|
"grad_norm": 0.11519090086221695, |
|
"learning_rate": 3.644333124608642e-05, |
|
"loss": 0.0012, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8816029143897997, |
|
"grad_norm": 0.03914317488670349, |
|
"learning_rate": 3.641202254226675e-05, |
|
"loss": 0.0011, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8834244080145719, |
|
"grad_norm": 1.1666841506958008, |
|
"learning_rate": 3.638071383844709e-05, |
|
"loss": 0.2138, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8852459016393442, |
|
"grad_norm": 0.02257809415459633, |
|
"learning_rate": 3.634940513462743e-05, |
|
"loss": 0.0008, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8870673952641166, |
|
"grad_norm": 0.026867952197790146, |
|
"learning_rate": 3.631809643080777e-05, |
|
"loss": 0.001, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.03357498347759247, |
|
"learning_rate": 3.62867877269881e-05, |
|
"loss": 0.0012, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8907103825136612, |
|
"grad_norm": 3.916914701461792, |
|
"learning_rate": 3.625547902316844e-05, |
|
"loss": 0.1349, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8925318761384335, |
|
"grad_norm": 3.825364828109741, |
|
"learning_rate": 3.6224170319348784e-05, |
|
"loss": 0.0989, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8943533697632058, |
|
"grad_norm": 0.034188635647296906, |
|
"learning_rate": 3.6192861615529116e-05, |
|
"loss": 0.0011, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8961748633879781, |
|
"grad_norm": 6.912594318389893, |
|
"learning_rate": 3.6161552911709454e-05, |
|
"loss": 0.0512, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8979963570127505, |
|
"grad_norm": 3.054703712463379, |
|
"learning_rate": 3.61302442078898e-05, |
|
"loss": 0.0481, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8998178506375227, |
|
"grad_norm": 0.16723403334617615, |
|
"learning_rate": 3.609893550407013e-05, |
|
"loss": 0.0034, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.9016393442622951, |
|
"grad_norm": 17.146343231201172, |
|
"learning_rate": 3.606762680025047e-05, |
|
"loss": 0.2352, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9034608378870674, |
|
"grad_norm": 4.063144207000732, |
|
"learning_rate": 3.603631809643081e-05, |
|
"loss": 0.0731, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.9052823315118397, |
|
"grad_norm": 0.10850164294242859, |
|
"learning_rate": 3.600500939261115e-05, |
|
"loss": 0.0027, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.907103825136612, |
|
"grad_norm": 0.18612350523471832, |
|
"learning_rate": 3.597370068879148e-05, |
|
"loss": 0.0035, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.9089253187613844, |
|
"grad_norm": 0.1478041708469391, |
|
"learning_rate": 3.594239198497182e-05, |
|
"loss": 0.0032, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.9107468123861566, |
|
"grad_norm": 0.07824063301086426, |
|
"learning_rate": 3.5911083281152166e-05, |
|
"loss": 0.002, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.912568306010929, |
|
"grad_norm": 0.08944151550531387, |
|
"learning_rate": 3.58797745773325e-05, |
|
"loss": 0.0026, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.9143897996357013, |
|
"grad_norm": 0.04674902930855751, |
|
"learning_rate": 3.5848465873512835e-05, |
|
"loss": 0.0015, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.9162112932604736, |
|
"grad_norm": 0.038566358387470245, |
|
"learning_rate": 3.581715716969318e-05, |
|
"loss": 0.0013, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 5.497028350830078, |
|
"learning_rate": 3.578584846587352e-05, |
|
"loss": 0.2885, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.9198542805100182, |
|
"grad_norm": 0.0487968772649765, |
|
"learning_rate": 3.575453976205385e-05, |
|
"loss": 0.0015, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9216757741347905, |
|
"grad_norm": 0.04053632542490959, |
|
"learning_rate": 3.572323105823419e-05, |
|
"loss": 0.0014, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.9234972677595629, |
|
"grad_norm": 0.18789644539356232, |
|
"learning_rate": 3.569192235441453e-05, |
|
"loss": 0.003, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.9253187613843351, |
|
"grad_norm": 0.161463662981987, |
|
"learning_rate": 3.5660613650594864e-05, |
|
"loss": 0.0028, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.9271402550091075, |
|
"grad_norm": 0.05199124291539192, |
|
"learning_rate": 3.56293049467752e-05, |
|
"loss": 0.0017, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.9289617486338798, |
|
"grad_norm": 0.035535071045160294, |
|
"learning_rate": 3.559799624295555e-05, |
|
"loss": 0.0013, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.930783242258652, |
|
"grad_norm": 0.05581507459282875, |
|
"learning_rate": 3.556668753913588e-05, |
|
"loss": 0.0018, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.9326047358834244, |
|
"grad_norm": 0.47275644540786743, |
|
"learning_rate": 3.553537883531622e-05, |
|
"loss": 0.004, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.9344262295081968, |
|
"grad_norm": 0.0707859992980957, |
|
"learning_rate": 3.550407013149656e-05, |
|
"loss": 0.0015, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.936247723132969, |
|
"grad_norm": 8.448419570922852, |
|
"learning_rate": 3.54727614276769e-05, |
|
"loss": 0.1712, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.9380692167577414, |
|
"grad_norm": 2.0937178134918213, |
|
"learning_rate": 3.544145272385723e-05, |
|
"loss": 0.017, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.9398907103825137, |
|
"grad_norm": 0.7188909649848938, |
|
"learning_rate": 3.541014402003757e-05, |
|
"loss": 0.0084, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.941712204007286, |
|
"grad_norm": 0.07177567481994629, |
|
"learning_rate": 3.5378835316217914e-05, |
|
"loss": 0.0015, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.9435336976320583, |
|
"grad_norm": 0.686763346195221, |
|
"learning_rate": 3.5347526612398246e-05, |
|
"loss": 0.0047, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.9453551912568307, |
|
"grad_norm": 0.06569703668355942, |
|
"learning_rate": 3.5316217908578584e-05, |
|
"loss": 0.0016, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.9471766848816029, |
|
"grad_norm": 0.024774452671408653, |
|
"learning_rate": 3.528490920475893e-05, |
|
"loss": 0.0009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9489981785063752, |
|
"grad_norm": 0.03425534442067146, |
|
"learning_rate": 3.525360050093926e-05, |
|
"loss": 0.0011, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.9508196721311475, |
|
"grad_norm": 0.03125905990600586, |
|
"learning_rate": 3.52222917971196e-05, |
|
"loss": 0.001, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.9526411657559198, |
|
"grad_norm": 10.502355575561523, |
|
"learning_rate": 3.5190983093299936e-05, |
|
"loss": 0.2401, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.9544626593806922, |
|
"grad_norm": 0.037440430372953415, |
|
"learning_rate": 3.515967438948028e-05, |
|
"loss": 0.0009, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.9562841530054644, |
|
"grad_norm": 6.415884017944336, |
|
"learning_rate": 3.512836568566061e-05, |
|
"loss": 0.1531, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9581056466302368, |
|
"grad_norm": 0.03087371401488781, |
|
"learning_rate": 3.509705698184095e-05, |
|
"loss": 0.001, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.9599271402550091, |
|
"grad_norm": 8.961065292358398, |
|
"learning_rate": 3.5065748278021296e-05, |
|
"loss": 0.0851, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 0.1427253782749176, |
|
"learning_rate": 3.503443957420163e-05, |
|
"loss": 0.0018, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.9635701275045537, |
|
"grad_norm": 2.9051027297973633, |
|
"learning_rate": 3.5003130870381965e-05, |
|
"loss": 0.0202, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.9653916211293261, |
|
"grad_norm": 1.785143494606018, |
|
"learning_rate": 3.497182216656231e-05, |
|
"loss": 0.191, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9672131147540983, |
|
"grad_norm": 0.08835508674383163, |
|
"learning_rate": 3.494051346274265e-05, |
|
"loss": 0.0012, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.9690346083788707, |
|
"grad_norm": 3.4107658863067627, |
|
"learning_rate": 3.490920475892298e-05, |
|
"loss": 0.0126, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.970856102003643, |
|
"grad_norm": 3.6474239826202393, |
|
"learning_rate": 3.487789605510332e-05, |
|
"loss": 0.1471, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.9726775956284153, |
|
"grad_norm": 1.437477946281433, |
|
"learning_rate": 3.484658735128366e-05, |
|
"loss": 0.0089, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.9744990892531876, |
|
"grad_norm": 0.03210463002324104, |
|
"learning_rate": 3.4815278647463994e-05, |
|
"loss": 0.001, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.97632058287796, |
|
"grad_norm": 4.223978042602539, |
|
"learning_rate": 3.478396994364433e-05, |
|
"loss": 0.0222, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9781420765027322, |
|
"grad_norm": 0.0497698038816452, |
|
"learning_rate": 3.475266123982468e-05, |
|
"loss": 0.0016, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9799635701275046, |
|
"grad_norm": 7.607251167297363, |
|
"learning_rate": 3.472135253600501e-05, |
|
"loss": 0.0362, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9817850637522769, |
|
"grad_norm": 0.5749123692512512, |
|
"learning_rate": 3.4690043832185347e-05, |
|
"loss": 0.0097, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 0.22390051186084747, |
|
"learning_rate": 3.4658735128365685e-05, |
|
"loss": 0.0029, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9854280510018215, |
|
"grad_norm": 11.045784950256348, |
|
"learning_rate": 3.462742642454603e-05, |
|
"loss": 0.064, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9872495446265938, |
|
"grad_norm": 0.17796143889427185, |
|
"learning_rate": 3.459611772072636e-05, |
|
"loss": 0.0021, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9890710382513661, |
|
"grad_norm": 0.05928805470466614, |
|
"learning_rate": 3.45648090169067e-05, |
|
"loss": 0.0015, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9908925318761385, |
|
"grad_norm": 0.3007548451423645, |
|
"learning_rate": 3.4533500313087044e-05, |
|
"loss": 0.0031, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9927140255009107, |
|
"grad_norm": 0.2960835099220276, |
|
"learning_rate": 3.4502191609267375e-05, |
|
"loss": 0.003, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.994535519125683, |
|
"grad_norm": 0.2534222900867462, |
|
"learning_rate": 3.4470882905447714e-05, |
|
"loss": 0.0022, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9963570127504554, |
|
"grad_norm": 9.211565971374512, |
|
"learning_rate": 3.443957420162806e-05, |
|
"loss": 0.0399, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9981785063752276, |
|
"grad_norm": 4.776447772979736, |
|
"learning_rate": 3.4408265497808397e-05, |
|
"loss": 0.0172, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.753249168395996, |
|
"learning_rate": 3.437695679398873e-05, |
|
"loss": 0.1608, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.988615664845173, |
|
"eval_loss": 0.04577361047267914, |
|
"eval_runtime": 72.6414, |
|
"eval_samples_per_second": 60.461, |
|
"eval_steps_per_second": 1.9, |
|
"step": 549 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1647, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4622335020564480.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|