|
{ |
|
"best_metric": 0.8666666666666667, |
|
"best_model_checkpoint": "CTMAE2_CS_V7_5/checkpoint-1755", |
|
"epoch": 49.01494845360825, |
|
"eval_steps": 500, |
|
"global_step": 9700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010309278350515464, |
|
"grad_norm": 5.501185894012451, |
|
"learning_rate": 1.0309278350515465e-07, |
|
"loss": 0.6908, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002061855670103093, |
|
"grad_norm": 2.460876941680908, |
|
"learning_rate": 2.061855670103093e-07, |
|
"loss": 0.6912, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003092783505154639, |
|
"grad_norm": 2.6134722232818604, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"loss": 0.6906, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004123711340206186, |
|
"grad_norm": 3.098764181137085, |
|
"learning_rate": 4.123711340206186e-07, |
|
"loss": 0.6779, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005154639175257732, |
|
"grad_norm": 2.6916747093200684, |
|
"learning_rate": 5.154639175257732e-07, |
|
"loss": 0.688, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006185567010309278, |
|
"grad_norm": 2.6864609718322754, |
|
"learning_rate": 6.185567010309279e-07, |
|
"loss": 0.6744, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007216494845360825, |
|
"grad_norm": 1.6162508726119995, |
|
"learning_rate": 7.216494845360824e-07, |
|
"loss": 0.6806, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008247422680412371, |
|
"grad_norm": 6.75019645690918, |
|
"learning_rate": 8.247422680412372e-07, |
|
"loss": 0.6594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009278350515463918, |
|
"grad_norm": 3.412571668624878, |
|
"learning_rate": 9.278350515463919e-07, |
|
"loss": 0.6602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010309278350515464, |
|
"grad_norm": 3.0713565349578857, |
|
"learning_rate": 1.0309278350515464e-06, |
|
"loss": 0.6957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01134020618556701, |
|
"grad_norm": 2.902609348297119, |
|
"learning_rate": 1.134020618556701e-06, |
|
"loss": 0.6819, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.012371134020618556, |
|
"grad_norm": 13.36193561553955, |
|
"learning_rate": 1.2371134020618557e-06, |
|
"loss": 0.6161, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.013402061855670102, |
|
"grad_norm": 5.576000213623047, |
|
"learning_rate": 1.3402061855670104e-06, |
|
"loss": 0.6261, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01443298969072165, |
|
"grad_norm": 6.617053031921387, |
|
"learning_rate": 1.4432989690721649e-06, |
|
"loss": 0.529, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.015463917525773196, |
|
"grad_norm": 12.840880393981934, |
|
"learning_rate": 1.5463917525773197e-06, |
|
"loss": 0.7079, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.016494845360824743, |
|
"grad_norm": 10.174439430236816, |
|
"learning_rate": 1.6494845360824744e-06, |
|
"loss": 0.6972, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01752577319587629, |
|
"grad_norm": 19.259891510009766, |
|
"learning_rate": 1.7525773195876288e-06, |
|
"loss": 0.54, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.018556701030927835, |
|
"grad_norm": 6.065194129943848, |
|
"learning_rate": 1.8556701030927837e-06, |
|
"loss": 0.646, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01958762886597938, |
|
"grad_norm": 9.375090599060059, |
|
"learning_rate": 1.9587628865979384e-06, |
|
"loss": 0.6793, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.020103092783505156, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 0.7615872025489807, |
|
"eval_runtime": 15.3942, |
|
"eval_samples_per_second": 2.923, |
|
"eval_steps_per_second": 0.78, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.0005154639175258, |
|
"grad_norm": 13.356775283813477, |
|
"learning_rate": 2.061855670103093e-06, |
|
"loss": 0.5875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0015463917525773, |
|
"grad_norm": 4.08266544342041, |
|
"learning_rate": 2.1649484536082477e-06, |
|
"loss": 0.6459, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0025773195876289, |
|
"grad_norm": 6.374290466308594, |
|
"learning_rate": 2.268041237113402e-06, |
|
"loss": 0.6691, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0036082474226804, |
|
"grad_norm": 5.553038597106934, |
|
"learning_rate": 2.3711340206185566e-06, |
|
"loss": 0.6392, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.004639175257732, |
|
"grad_norm": 19.62584686279297, |
|
"learning_rate": 2.4742268041237115e-06, |
|
"loss": 0.634, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0056701030927835, |
|
"grad_norm": 8.070799827575684, |
|
"learning_rate": 2.577319587628866e-06, |
|
"loss": 0.6665, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.006701030927835, |
|
"grad_norm": 8.732718467712402, |
|
"learning_rate": 2.680412371134021e-06, |
|
"loss": 0.6228, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0077319587628866, |
|
"grad_norm": 4.9655890464782715, |
|
"learning_rate": 2.7835051546391757e-06, |
|
"loss": 0.6189, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0087628865979381, |
|
"grad_norm": 9.290786743164062, |
|
"learning_rate": 2.8865979381443297e-06, |
|
"loss": 0.7474, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0097938144329897, |
|
"grad_norm": 7.63596248626709, |
|
"learning_rate": 2.9896907216494846e-06, |
|
"loss": 0.5911, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0108247422680412, |
|
"grad_norm": 10.840821266174316, |
|
"learning_rate": 3.0927835051546395e-06, |
|
"loss": 0.6093, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0118556701030927, |
|
"grad_norm": 4.624059200286865, |
|
"learning_rate": 3.195876288659794e-06, |
|
"loss": 0.7803, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0128865979381443, |
|
"grad_norm": 5.822396755218506, |
|
"learning_rate": 3.298969072164949e-06, |
|
"loss": 0.598, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0139175257731958, |
|
"grad_norm": 3.33036732673645, |
|
"learning_rate": 3.4020618556701037e-06, |
|
"loss": 0.5865, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0149484536082474, |
|
"grad_norm": 5.949455261230469, |
|
"learning_rate": 3.5051546391752577e-06, |
|
"loss": 0.6764, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.015979381443299, |
|
"grad_norm": 6.306659698486328, |
|
"learning_rate": 3.6082474226804126e-06, |
|
"loss": 0.645, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0170103092783505, |
|
"grad_norm": 17.39265251159668, |
|
"learning_rate": 3.7113402061855674e-06, |
|
"loss": 0.64, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.018041237113402, |
|
"grad_norm": 7.141329288482666, |
|
"learning_rate": 3.814432989690722e-06, |
|
"loss": 0.5337, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0190721649484535, |
|
"grad_norm": 6.430778980255127, |
|
"learning_rate": 3.917525773195877e-06, |
|
"loss": 0.5442, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.020103092783505, |
|
"grad_norm": 15.002116203308105, |
|
"learning_rate": 4.020618556701032e-06, |
|
"loss": 0.5591, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.020103092783505, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 0.7040268182754517, |
|
"eval_runtime": 12.9016, |
|
"eval_samples_per_second": 3.488, |
|
"eval_steps_per_second": 0.93, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0010309278350515, |
|
"grad_norm": 9.877669334411621, |
|
"learning_rate": 4.123711340206186e-06, |
|
"loss": 0.5959, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.002061855670103, |
|
"grad_norm": 7.665306568145752, |
|
"learning_rate": 4.2268041237113405e-06, |
|
"loss": 0.6647, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0030927835051546, |
|
"grad_norm": 19.635908126831055, |
|
"learning_rate": 4.329896907216495e-06, |
|
"loss": 0.5142, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.004123711340206, |
|
"grad_norm": 13.251553535461426, |
|
"learning_rate": 4.4329896907216494e-06, |
|
"loss": 0.7387, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.0051546391752577, |
|
"grad_norm": 33.24567794799805, |
|
"learning_rate": 4.536082474226804e-06, |
|
"loss": 0.5326, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.0061855670103093, |
|
"grad_norm": 17.012880325317383, |
|
"learning_rate": 4.639175257731959e-06, |
|
"loss": 0.6149, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.007216494845361, |
|
"grad_norm": 12.129619598388672, |
|
"learning_rate": 4.742268041237113e-06, |
|
"loss": 0.4566, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.0082474226804123, |
|
"grad_norm": 32.084442138671875, |
|
"learning_rate": 4.845360824742268e-06, |
|
"loss": 0.8027, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.009278350515464, |
|
"grad_norm": 6.839757442474365, |
|
"learning_rate": 4.948453608247423e-06, |
|
"loss": 0.5719, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0103092783505154, |
|
"grad_norm": 18.979419708251953, |
|
"learning_rate": 5.051546391752578e-06, |
|
"loss": 0.5684, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.011340206185567, |
|
"grad_norm": 11.15890121459961, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 0.4082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0123711340206185, |
|
"grad_norm": 10.10776138305664, |
|
"learning_rate": 5.257731958762888e-06, |
|
"loss": 0.5722, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.01340206185567, |
|
"grad_norm": 14.203100204467773, |
|
"learning_rate": 5.360824742268042e-06, |
|
"loss": 0.4529, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.0144329896907216, |
|
"grad_norm": 13.093475341796875, |
|
"learning_rate": 5.463917525773196e-06, |
|
"loss": 0.5994, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.015463917525773, |
|
"grad_norm": 23.01972007751465, |
|
"learning_rate": 5.567010309278351e-06, |
|
"loss": 0.5696, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.0164948453608247, |
|
"grad_norm": 24.430442810058594, |
|
"learning_rate": 5.670103092783505e-06, |
|
"loss": 0.5652, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0175257731958762, |
|
"grad_norm": 15.79038143157959, |
|
"learning_rate": 5.7731958762886594e-06, |
|
"loss": 0.5913, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.0185567010309278, |
|
"grad_norm": 21.04030990600586, |
|
"learning_rate": 5.876288659793815e-06, |
|
"loss": 0.484, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.0195876288659793, |
|
"grad_norm": 8.871047973632812, |
|
"learning_rate": 5.979381443298969e-06, |
|
"loss": 0.7211, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.020103092783505, |
|
"eval_accuracy": 0.8222222222222222, |
|
"eval_loss": 0.4916439354419708, |
|
"eval_runtime": 12.7637, |
|
"eval_samples_per_second": 3.526, |
|
"eval_steps_per_second": 0.94, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.0005154639175258, |
|
"grad_norm": 9.930680274963379, |
|
"learning_rate": 6.082474226804124e-06, |
|
"loss": 0.4872, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0015463917525773, |
|
"grad_norm": 8.775280952453613, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 0.4803, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.002577319587629, |
|
"grad_norm": 16.243377685546875, |
|
"learning_rate": 6.288659793814433e-06, |
|
"loss": 0.4948, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.0036082474226804, |
|
"grad_norm": 43.30958557128906, |
|
"learning_rate": 6.391752577319588e-06, |
|
"loss": 0.6622, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.004639175257732, |
|
"grad_norm": 22.228527069091797, |
|
"learning_rate": 6.494845360824743e-06, |
|
"loss": 0.4748, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.0056701030927835, |
|
"grad_norm": 17.75151252746582, |
|
"learning_rate": 6.597938144329898e-06, |
|
"loss": 0.4999, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.006701030927835, |
|
"grad_norm": 11.695698738098145, |
|
"learning_rate": 6.701030927835052e-06, |
|
"loss": 0.259, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.0077319587628866, |
|
"grad_norm": 38.64352035522461, |
|
"learning_rate": 6.804123711340207e-06, |
|
"loss": 1.0941, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.008762886597938, |
|
"grad_norm": 13.178338050842285, |
|
"learning_rate": 6.907216494845361e-06, |
|
"loss": 0.9189, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.0097938144329897, |
|
"grad_norm": 18.1324462890625, |
|
"learning_rate": 7.010309278350515e-06, |
|
"loss": 0.6344, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.010824742268041, |
|
"grad_norm": 11.067273139953613, |
|
"learning_rate": 7.113402061855671e-06, |
|
"loss": 0.4916, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.0118556701030927, |
|
"grad_norm": 7.603753566741943, |
|
"learning_rate": 7.216494845360825e-06, |
|
"loss": 0.5001, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0128865979381443, |
|
"grad_norm": 4.259734630584717, |
|
"learning_rate": 7.319587628865979e-06, |
|
"loss": 0.4538, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.013917525773196, |
|
"grad_norm": 19.640830993652344, |
|
"learning_rate": 7.422680412371135e-06, |
|
"loss": 0.5484, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.0149484536082474, |
|
"grad_norm": 20.696624755859375, |
|
"learning_rate": 7.525773195876289e-06, |
|
"loss": 0.5014, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.015979381443299, |
|
"grad_norm": 59.72232437133789, |
|
"learning_rate": 7.628865979381444e-06, |
|
"loss": 0.5271, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.0170103092783505, |
|
"grad_norm": 11.003219604492188, |
|
"learning_rate": 7.731958762886599e-06, |
|
"loss": 0.5286, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.018041237113402, |
|
"grad_norm": 13.869205474853516, |
|
"learning_rate": 7.835051546391754e-06, |
|
"loss": 0.4798, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.0190721649484535, |
|
"grad_norm": 12.36577320098877, |
|
"learning_rate": 7.938144329896907e-06, |
|
"loss": 0.6944, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.020103092783505, |
|
"grad_norm": 30.60397720336914, |
|
"learning_rate": 8.041237113402063e-06, |
|
"loss": 0.5544, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.020103092783505, |
|
"eval_accuracy": 0.5555555555555556, |
|
"eval_loss": 0.7589893937110901, |
|
"eval_runtime": 12.6816, |
|
"eval_samples_per_second": 3.548, |
|
"eval_steps_per_second": 0.946, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.0010309278350515, |
|
"grad_norm": 20.62627601623535, |
|
"learning_rate": 8.144329896907216e-06, |
|
"loss": 0.5034, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.002061855670103, |
|
"grad_norm": 4.391873836517334, |
|
"learning_rate": 8.247422680412371e-06, |
|
"loss": 0.7131, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.003092783505155, |
|
"grad_norm": 14.114368438720703, |
|
"learning_rate": 8.350515463917526e-06, |
|
"loss": 0.5573, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.004123711340206, |
|
"grad_norm": 3.9918265342712402, |
|
"learning_rate": 8.453608247422681e-06, |
|
"loss": 0.5594, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.005154639175258, |
|
"grad_norm": 29.062408447265625, |
|
"learning_rate": 8.556701030927836e-06, |
|
"loss": 0.5178, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.006185567010309, |
|
"grad_norm": 13.267045021057129, |
|
"learning_rate": 8.65979381443299e-06, |
|
"loss": 0.6529, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.007216494845361, |
|
"grad_norm": 31.6133975982666, |
|
"learning_rate": 8.762886597938146e-06, |
|
"loss": 0.4495, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.008247422680412, |
|
"grad_norm": 7.6835150718688965, |
|
"learning_rate": 8.865979381443299e-06, |
|
"loss": 0.5844, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.009278350515464, |
|
"grad_norm": 3.838587522506714, |
|
"learning_rate": 8.969072164948455e-06, |
|
"loss": 0.6229, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.010309278350515, |
|
"grad_norm": 12.973073959350586, |
|
"learning_rate": 9.072164948453609e-06, |
|
"loss": 0.5575, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.011340206185567, |
|
"grad_norm": 9.979464530944824, |
|
"learning_rate": 9.175257731958764e-06, |
|
"loss": 0.7956, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.0123711340206185, |
|
"grad_norm": 10.023995399475098, |
|
"learning_rate": 9.278350515463918e-06, |
|
"loss": 0.5851, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.01340206185567, |
|
"grad_norm": 25.86085319519043, |
|
"learning_rate": 9.381443298969073e-06, |
|
"loss": 0.4864, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.014432989690722, |
|
"grad_norm": 5.599601745605469, |
|
"learning_rate": 9.484536082474226e-06, |
|
"loss": 0.5482, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.015463917525773, |
|
"grad_norm": 1.694999098777771, |
|
"learning_rate": 9.587628865979383e-06, |
|
"loss": 0.4848, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.016494845360825, |
|
"grad_norm": 49.36362075805664, |
|
"learning_rate": 9.690721649484536e-06, |
|
"loss": 0.3432, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.017525773195876, |
|
"grad_norm": 62.547489166259766, |
|
"learning_rate": 9.793814432989691e-06, |
|
"loss": 0.9801, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.018556701030928, |
|
"grad_norm": 6.7530517578125, |
|
"learning_rate": 9.896907216494846e-06, |
|
"loss": 0.7231, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.019587628865979, |
|
"grad_norm": 18.402511596679688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6032, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.020103092783505, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.5508340001106262, |
|
"eval_runtime": 13.2243, |
|
"eval_samples_per_second": 3.403, |
|
"eval_steps_per_second": 0.907, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.000515463917526, |
|
"grad_norm": 9.008790969848633, |
|
"learning_rate": 9.988545246277205e-06, |
|
"loss": 0.5444, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.001546391752577, |
|
"grad_norm": 24.455364227294922, |
|
"learning_rate": 9.977090492554411e-06, |
|
"loss": 0.6265, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.002577319587629, |
|
"grad_norm": 2.534010410308838, |
|
"learning_rate": 9.965635738831616e-06, |
|
"loss": 0.4103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.00360824742268, |
|
"grad_norm": 2.197962999343872, |
|
"learning_rate": 9.95418098510882e-06, |
|
"loss": 0.6683, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.004639175257732, |
|
"grad_norm": 21.681041717529297, |
|
"learning_rate": 9.942726231386026e-06, |
|
"loss": 0.5277, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.0056701030927835, |
|
"grad_norm": 6.0828752517700195, |
|
"learning_rate": 9.931271477663231e-06, |
|
"loss": 0.8429, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.006701030927835, |
|
"grad_norm": 9.508393287658691, |
|
"learning_rate": 9.919816723940437e-06, |
|
"loss": 0.4463, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.007731958762887, |
|
"grad_norm": 8.229544639587402, |
|
"learning_rate": 9.908361970217641e-06, |
|
"loss": 0.4216, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.008762886597938, |
|
"grad_norm": 45.83427429199219, |
|
"learning_rate": 9.896907216494846e-06, |
|
"loss": 0.5766, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.00979381443299, |
|
"grad_norm": 6.862059116363525, |
|
"learning_rate": 9.885452462772052e-06, |
|
"loss": 0.587, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.010824742268041, |
|
"grad_norm": 8.74276065826416, |
|
"learning_rate": 9.873997709049257e-06, |
|
"loss": 0.4442, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.011855670103093, |
|
"grad_norm": 22.86684226989746, |
|
"learning_rate": 9.862542955326461e-06, |
|
"loss": 0.6656, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.012886597938144, |
|
"grad_norm": 6.2574849128723145, |
|
"learning_rate": 9.851088201603667e-06, |
|
"loss": 0.421, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.013917525773196, |
|
"grad_norm": 19.12122344970703, |
|
"learning_rate": 9.839633447880872e-06, |
|
"loss": 0.4395, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.014948453608247, |
|
"grad_norm": 47.755435943603516, |
|
"learning_rate": 9.828178694158076e-06, |
|
"loss": 0.6335, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.015979381443299, |
|
"grad_norm": 11.343196868896484, |
|
"learning_rate": 9.81672394043528e-06, |
|
"loss": 0.4231, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.0170103092783505, |
|
"grad_norm": 15.7100248336792, |
|
"learning_rate": 9.805269186712487e-06, |
|
"loss": 0.4681, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.018041237113402, |
|
"grad_norm": 29.985126495361328, |
|
"learning_rate": 9.793814432989691e-06, |
|
"loss": 0.7692, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.0190721649484535, |
|
"grad_norm": 4.043577194213867, |
|
"learning_rate": 9.782359679266896e-06, |
|
"loss": 0.5417, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.020103092783505, |
|
"grad_norm": 6.033001899719238, |
|
"learning_rate": 9.770904925544102e-06, |
|
"loss": 0.518, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.020103092783505, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 0.8928155899047852, |
|
"eval_runtime": 13.9107, |
|
"eval_samples_per_second": 3.235, |
|
"eval_steps_per_second": 0.863, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.0010309278350515, |
|
"grad_norm": 6.170740604400635, |
|
"learning_rate": 9.759450171821306e-06, |
|
"loss": 0.5075, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.002061855670103, |
|
"grad_norm": 6.046166896820068, |
|
"learning_rate": 9.747995418098512e-06, |
|
"loss": 0.6887, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.003092783505155, |
|
"grad_norm": 8.674880981445312, |
|
"learning_rate": 9.736540664375717e-06, |
|
"loss": 0.5482, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.004123711340206, |
|
"grad_norm": 4.15226411819458, |
|
"learning_rate": 9.725085910652921e-06, |
|
"loss": 0.5811, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.005154639175258, |
|
"grad_norm": 9.141851425170898, |
|
"learning_rate": 9.713631156930127e-06, |
|
"loss": 0.507, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.006185567010309, |
|
"grad_norm": 10.06373119354248, |
|
"learning_rate": 9.702176403207332e-06, |
|
"loss": 0.6606, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.007216494845361, |
|
"grad_norm": 1.9274550676345825, |
|
"learning_rate": 9.690721649484536e-06, |
|
"loss": 0.4037, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.008247422680412, |
|
"grad_norm": 15.267620086669922, |
|
"learning_rate": 9.679266895761742e-06, |
|
"loss": 0.439, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.009278350515464, |
|
"grad_norm": 87.103759765625, |
|
"learning_rate": 9.667812142038947e-06, |
|
"loss": 0.6971, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.010309278350515, |
|
"grad_norm": 17.93890380859375, |
|
"learning_rate": 9.656357388316153e-06, |
|
"loss": 0.6488, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.011340206185567, |
|
"grad_norm": 13.628206253051758, |
|
"learning_rate": 9.644902634593357e-06, |
|
"loss": 0.4316, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.0123711340206185, |
|
"grad_norm": 1.0886458158493042, |
|
"learning_rate": 9.633447880870562e-06, |
|
"loss": 0.4252, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.01340206185567, |
|
"grad_norm": 0.41759127378463745, |
|
"learning_rate": 9.621993127147768e-06, |
|
"loss": 0.4604, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.014432989690722, |
|
"grad_norm": 7.780788421630859, |
|
"learning_rate": 9.610538373424972e-06, |
|
"loss": 0.7416, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.015463917525773, |
|
"grad_norm": 1.952467918395996, |
|
"learning_rate": 9.599083619702177e-06, |
|
"loss": 0.3673, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.016494845360825, |
|
"grad_norm": 2.227653741836548, |
|
"learning_rate": 9.587628865979383e-06, |
|
"loss": 0.5467, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.017525773195876, |
|
"grad_norm": 45.32661819458008, |
|
"learning_rate": 9.576174112256587e-06, |
|
"loss": 0.6066, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.018556701030928, |
|
"grad_norm": 6.04111385345459, |
|
"learning_rate": 9.564719358533792e-06, |
|
"loss": 0.9221, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.019587628865979, |
|
"grad_norm": 19.719083786010742, |
|
"learning_rate": 9.553264604810998e-06, |
|
"loss": 0.4857, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.020103092783505, |
|
"eval_accuracy": 0.6222222222222222, |
|
"eval_loss": 0.5889319777488708, |
|
"eval_runtime": 12.7268, |
|
"eval_samples_per_second": 3.536, |
|
"eval_steps_per_second": 0.943, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 7.000515463917526, |
|
"grad_norm": 4.581728935241699, |
|
"learning_rate": 9.541809851088203e-06, |
|
"loss": 0.488, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.001546391752577, |
|
"grad_norm": 27.277809143066406, |
|
"learning_rate": 9.530355097365407e-06, |
|
"loss": 0.5966, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.002577319587629, |
|
"grad_norm": 0.2609100341796875, |
|
"learning_rate": 9.518900343642611e-06, |
|
"loss": 0.3383, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.00360824742268, |
|
"grad_norm": 22.71802520751953, |
|
"learning_rate": 9.507445589919818e-06, |
|
"loss": 0.7841, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.004639175257732, |
|
"grad_norm": 16.892574310302734, |
|
"learning_rate": 9.495990836197022e-06, |
|
"loss": 0.4176, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.0056701030927835, |
|
"grad_norm": 39.4376335144043, |
|
"learning_rate": 9.484536082474226e-06, |
|
"loss": 0.3999, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.006701030927835, |
|
"grad_norm": 14.214164733886719, |
|
"learning_rate": 9.473081328751433e-06, |
|
"loss": 0.6387, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.007731958762887, |
|
"grad_norm": 11.498187065124512, |
|
"learning_rate": 9.461626575028637e-06, |
|
"loss": 0.5193, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.008762886597938, |
|
"grad_norm": 8.90798568725586, |
|
"learning_rate": 9.450171821305843e-06, |
|
"loss": 0.4921, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.00979381443299, |
|
"grad_norm": 4.212175369262695, |
|
"learning_rate": 9.438717067583048e-06, |
|
"loss": 0.4248, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.010824742268041, |
|
"grad_norm": 15.754873275756836, |
|
"learning_rate": 9.427262313860252e-06, |
|
"loss": 0.5522, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.011855670103093, |
|
"grad_norm": 20.611268997192383, |
|
"learning_rate": 9.415807560137458e-06, |
|
"loss": 0.5084, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.012886597938144, |
|
"grad_norm": 9.555137634277344, |
|
"learning_rate": 9.404352806414663e-06, |
|
"loss": 0.2632, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.013917525773196, |
|
"grad_norm": 2.367072343826294, |
|
"learning_rate": 9.392898052691867e-06, |
|
"loss": 0.4345, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.014948453608247, |
|
"grad_norm": 3.8319485187530518, |
|
"learning_rate": 9.381443298969073e-06, |
|
"loss": 0.5227, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.015979381443299, |
|
"grad_norm": 7.583319187164307, |
|
"learning_rate": 9.369988545246278e-06, |
|
"loss": 0.3003, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.0170103092783505, |
|
"grad_norm": 7.431451797485352, |
|
"learning_rate": 9.358533791523484e-06, |
|
"loss": 0.6448, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.018041237113402, |
|
"grad_norm": 32.28150939941406, |
|
"learning_rate": 9.347079037800688e-06, |
|
"loss": 0.4669, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.0190721649484535, |
|
"grad_norm": 24.32545280456543, |
|
"learning_rate": 9.335624284077893e-06, |
|
"loss": 0.5635, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.020103092783505, |
|
"grad_norm": 1.7349843978881836, |
|
"learning_rate": 9.324169530355099e-06, |
|
"loss": 0.3634, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.020103092783505, |
|
"eval_accuracy": 0.6444444444444445, |
|
"eval_loss": 0.8522602319717407, |
|
"eval_runtime": 12.7523, |
|
"eval_samples_per_second": 3.529, |
|
"eval_steps_per_second": 0.941, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.001030927835052, |
|
"grad_norm": 27.929956436157227, |
|
"learning_rate": 9.312714776632303e-06, |
|
"loss": 0.8071, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.002061855670103, |
|
"grad_norm": 19.774404525756836, |
|
"learning_rate": 9.301260022909508e-06, |
|
"loss": 0.714, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.003092783505155, |
|
"grad_norm": 5.483359336853027, |
|
"learning_rate": 9.289805269186714e-06, |
|
"loss": 0.4367, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.004123711340206, |
|
"grad_norm": 13.389904022216797, |
|
"learning_rate": 9.278350515463918e-06, |
|
"loss": 0.5099, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.005154639175258, |
|
"grad_norm": 7.315684795379639, |
|
"learning_rate": 9.266895761741125e-06, |
|
"loss": 0.4736, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.00618556701031, |
|
"grad_norm": 14.2125825881958, |
|
"learning_rate": 9.255441008018329e-06, |
|
"loss": 0.392, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.00721649484536, |
|
"grad_norm": 8.564682006835938, |
|
"learning_rate": 9.243986254295533e-06, |
|
"loss": 0.4436, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.008247422680412, |
|
"grad_norm": 27.293010711669922, |
|
"learning_rate": 9.23253150057274e-06, |
|
"loss": 0.4409, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.009278350515464, |
|
"grad_norm": 15.106826782226562, |
|
"learning_rate": 9.221076746849944e-06, |
|
"loss": 0.3801, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.010309278350515, |
|
"grad_norm": 22.984642028808594, |
|
"learning_rate": 9.209621993127148e-06, |
|
"loss": 0.5309, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.011340206185567, |
|
"grad_norm": 40.4260139465332, |
|
"learning_rate": 9.198167239404353e-06, |
|
"loss": 0.6704, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.012371134020619, |
|
"grad_norm": 11.083319664001465, |
|
"learning_rate": 9.186712485681557e-06, |
|
"loss": 0.651, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.01340206185567, |
|
"grad_norm": 6.827057838439941, |
|
"learning_rate": 9.175257731958764e-06, |
|
"loss": 0.7953, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.014432989690722, |
|
"grad_norm": 6.367009162902832, |
|
"learning_rate": 9.163802978235968e-06, |
|
"loss": 0.454, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.015463917525773, |
|
"grad_norm": 23.583059310913086, |
|
"learning_rate": 9.152348224513174e-06, |
|
"loss": 0.609, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.016494845360825, |
|
"grad_norm": 3.92618989944458, |
|
"learning_rate": 9.140893470790379e-06, |
|
"loss": 0.4297, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.017525773195876, |
|
"grad_norm": 4.04769229888916, |
|
"learning_rate": 9.129438717067583e-06, |
|
"loss": 0.3581, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.018556701030928, |
|
"grad_norm": 16.875659942626953, |
|
"learning_rate": 9.117983963344789e-06, |
|
"loss": 0.5501, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.01958762886598, |
|
"grad_norm": 2.816603183746338, |
|
"learning_rate": 9.106529209621994e-06, |
|
"loss": 0.4082, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.020103092783506, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.36465370655059814, |
|
"eval_runtime": 12.703, |
|
"eval_samples_per_second": 3.542, |
|
"eval_steps_per_second": 0.945, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 9.000515463917527, |
|
"grad_norm": 19.659175872802734, |
|
"learning_rate": 9.095074455899198e-06, |
|
"loss": 0.2411, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.001546391752578, |
|
"grad_norm": 9.694845199584961, |
|
"learning_rate": 9.083619702176404e-06, |
|
"loss": 0.5444, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.00257731958763, |
|
"grad_norm": 51.400840759277344, |
|
"learning_rate": 9.072164948453609e-06, |
|
"loss": 0.5556, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.003608247422681, |
|
"grad_norm": 20.920190811157227, |
|
"learning_rate": 9.060710194730815e-06, |
|
"loss": 0.4964, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.004639175257733, |
|
"grad_norm": 25.781005859375, |
|
"learning_rate": 9.04925544100802e-06, |
|
"loss": 0.4506, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.005670103092784, |
|
"grad_norm": 21.402894973754883, |
|
"learning_rate": 9.037800687285224e-06, |
|
"loss": 0.4645, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.006701030927836, |
|
"grad_norm": 6.41744327545166, |
|
"learning_rate": 9.02634593356243e-06, |
|
"loss": 0.2999, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.007731958762887, |
|
"grad_norm": 8.043073654174805, |
|
"learning_rate": 9.014891179839634e-06, |
|
"loss": 0.8446, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.008762886597939, |
|
"grad_norm": 10.563735008239746, |
|
"learning_rate": 9.003436426116839e-06, |
|
"loss": 0.3282, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.00979381443299, |
|
"grad_norm": 5.581331253051758, |
|
"learning_rate": 8.991981672394045e-06, |
|
"loss": 0.398, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.010824742268042, |
|
"grad_norm": 1.6693127155303955, |
|
"learning_rate": 8.98052691867125e-06, |
|
"loss": 0.2961, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.011855670103094, |
|
"grad_norm": 9.04586410522461, |
|
"learning_rate": 8.969072164948455e-06, |
|
"loss": 0.2784, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.012886597938145, |
|
"grad_norm": 15.87678337097168, |
|
"learning_rate": 8.95761741122566e-06, |
|
"loss": 0.4126, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.013917525773197, |
|
"grad_norm": 53.66142272949219, |
|
"learning_rate": 8.946162657502864e-06, |
|
"loss": 0.4191, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.014948453608248, |
|
"grad_norm": 31.80818748474121, |
|
"learning_rate": 8.93470790378007e-06, |
|
"loss": 0.4929, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.0159793814433, |
|
"grad_norm": 12.100385665893555, |
|
"learning_rate": 8.923253150057275e-06, |
|
"loss": 0.7639, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.017010309278351, |
|
"grad_norm": 3.1334633827209473, |
|
"learning_rate": 8.91179839633448e-06, |
|
"loss": 0.5291, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.018041237113403, |
|
"grad_norm": 6.849935531616211, |
|
"learning_rate": 8.900343642611684e-06, |
|
"loss": 0.6711, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.019072164948454, |
|
"grad_norm": 18.349355697631836, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.7262, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.020103092783506, |
|
"grad_norm": 41.263458251953125, |
|
"learning_rate": 8.877434135166094e-06, |
|
"loss": 0.4897, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.020103092783506, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.5648357272148132, |
|
"eval_runtime": 12.0862, |
|
"eval_samples_per_second": 3.723, |
|
"eval_steps_per_second": 0.993, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.001030927835052, |
|
"grad_norm": 22.894468307495117, |
|
"learning_rate": 8.865979381443299e-06, |
|
"loss": 0.4276, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 10.002061855670103, |
|
"grad_norm": 23.770139694213867, |
|
"learning_rate": 8.854524627720505e-06, |
|
"loss": 0.5575, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 10.003092783505155, |
|
"grad_norm": 7.501220703125, |
|
"learning_rate": 8.84306987399771e-06, |
|
"loss": 0.2925, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 10.004123711340206, |
|
"grad_norm": 16.40544891357422, |
|
"learning_rate": 8.831615120274914e-06, |
|
"loss": 0.4975, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 10.005154639175258, |
|
"grad_norm": 18.579830169677734, |
|
"learning_rate": 8.82016036655212e-06, |
|
"loss": 0.5642, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.00618556701031, |
|
"grad_norm": 10.547638893127441, |
|
"learning_rate": 8.808705612829325e-06, |
|
"loss": 0.3936, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 10.00721649484536, |
|
"grad_norm": 26.152042388916016, |
|
"learning_rate": 8.797250859106529e-06, |
|
"loss": 0.4257, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.008247422680412, |
|
"grad_norm": 29.884458541870117, |
|
"learning_rate": 8.785796105383735e-06, |
|
"loss": 0.6164, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 10.009278350515464, |
|
"grad_norm": 6.939556121826172, |
|
"learning_rate": 8.77434135166094e-06, |
|
"loss": 0.57, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 10.010309278350515, |
|
"grad_norm": 53.5316162109375, |
|
"learning_rate": 8.762886597938146e-06, |
|
"loss": 0.4892, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.011340206185567, |
|
"grad_norm": 14.673813819885254, |
|
"learning_rate": 8.75143184421535e-06, |
|
"loss": 0.3881, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 10.012371134020619, |
|
"grad_norm": 13.877957344055176, |
|
"learning_rate": 8.739977090492555e-06, |
|
"loss": 0.3445, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.01340206185567, |
|
"grad_norm": 0.6452627778053284, |
|
"learning_rate": 8.72852233676976e-06, |
|
"loss": 0.3769, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.014432989690722, |
|
"grad_norm": 27.587717056274414, |
|
"learning_rate": 8.717067583046965e-06, |
|
"loss": 0.4407, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.015463917525773, |
|
"grad_norm": 19.309782028198242, |
|
"learning_rate": 8.70561282932417e-06, |
|
"loss": 0.3936, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.016494845360825, |
|
"grad_norm": 13.032252311706543, |
|
"learning_rate": 8.694158075601376e-06, |
|
"loss": 0.3719, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.017525773195876, |
|
"grad_norm": 2.494875431060791, |
|
"learning_rate": 8.68270332187858e-06, |
|
"loss": 0.411, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 10.018556701030928, |
|
"grad_norm": 46.90223693847656, |
|
"learning_rate": 8.671248568155786e-06, |
|
"loss": 0.554, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 10.01958762886598, |
|
"grad_norm": 11.451055526733398, |
|
"learning_rate": 8.65979381443299e-06, |
|
"loss": 0.389, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.020103092783506, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.5735762715339661, |
|
"eval_runtime": 12.8299, |
|
"eval_samples_per_second": 3.507, |
|
"eval_steps_per_second": 0.935, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 11.000515463917527, |
|
"grad_norm": 11.298035621643066, |
|
"learning_rate": 8.648339060710195e-06, |
|
"loss": 0.5879, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 11.001546391752578, |
|
"grad_norm": 19.15152931213379, |
|
"learning_rate": 8.636884306987401e-06, |
|
"loss": 0.4341, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 11.00257731958763, |
|
"grad_norm": 1.2121660709381104, |
|
"learning_rate": 8.625429553264606e-06, |
|
"loss": 0.2884, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 11.003608247422681, |
|
"grad_norm": 36.596675872802734, |
|
"learning_rate": 8.61397479954181e-06, |
|
"loss": 0.4636, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 11.004639175257733, |
|
"grad_norm": 14.555014610290527, |
|
"learning_rate": 8.602520045819016e-06, |
|
"loss": 0.3522, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 11.005670103092784, |
|
"grad_norm": 77.03166961669922, |
|
"learning_rate": 8.591065292096221e-06, |
|
"loss": 0.3229, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.006701030927836, |
|
"grad_norm": 0.5664463639259338, |
|
"learning_rate": 8.579610538373425e-06, |
|
"loss": 0.5331, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 11.007731958762887, |
|
"grad_norm": 1.5499430894851685, |
|
"learning_rate": 8.56815578465063e-06, |
|
"loss": 0.4122, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 11.008762886597939, |
|
"grad_norm": 25.140165328979492, |
|
"learning_rate": 8.556701030927836e-06, |
|
"loss": 0.9055, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 11.00979381443299, |
|
"grad_norm": 42.95240020751953, |
|
"learning_rate": 8.54524627720504e-06, |
|
"loss": 0.4527, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 11.010824742268042, |
|
"grad_norm": 35.971248626708984, |
|
"learning_rate": 8.533791523482245e-06, |
|
"loss": 0.5452, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 11.011855670103094, |
|
"grad_norm": 21.85768699645996, |
|
"learning_rate": 8.522336769759451e-06, |
|
"loss": 0.5303, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 11.012886597938145, |
|
"grad_norm": 6.7367119789123535, |
|
"learning_rate": 8.510882016036655e-06, |
|
"loss": 0.5654, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 11.013917525773197, |
|
"grad_norm": 62.03700637817383, |
|
"learning_rate": 8.49942726231386e-06, |
|
"loss": 0.3585, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 11.014948453608248, |
|
"grad_norm": 79.50271606445312, |
|
"learning_rate": 8.487972508591066e-06, |
|
"loss": 0.5117, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 11.0159793814433, |
|
"grad_norm": 12.448205947875977, |
|
"learning_rate": 8.47651775486827e-06, |
|
"loss": 0.352, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.017010309278351, |
|
"grad_norm": 5.7825541496276855, |
|
"learning_rate": 8.465063001145477e-06, |
|
"loss": 0.4759, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 11.018041237113403, |
|
"grad_norm": 64.92884826660156, |
|
"learning_rate": 8.453608247422681e-06, |
|
"loss": 0.4795, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 11.019072164948454, |
|
"grad_norm": 8.412237167358398, |
|
"learning_rate": 8.442153493699886e-06, |
|
"loss": 0.4374, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 11.020103092783506, |
|
"grad_norm": 0.19983382523059845, |
|
"learning_rate": 8.430698739977092e-06, |
|
"loss": 0.3753, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 11.020103092783506, |
|
"eval_accuracy": 0.5777777777777777, |
|
"eval_loss": 1.0849213600158691, |
|
"eval_runtime": 12.7494, |
|
"eval_samples_per_second": 3.53, |
|
"eval_steps_per_second": 0.941, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 12.001030927835052, |
|
"grad_norm": 0.6972218155860901, |
|
"learning_rate": 8.419243986254296e-06, |
|
"loss": 0.2594, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 12.002061855670103, |
|
"grad_norm": 27.135608673095703, |
|
"learning_rate": 8.4077892325315e-06, |
|
"loss": 0.3156, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 12.003092783505155, |
|
"grad_norm": 126.41283416748047, |
|
"learning_rate": 8.396334478808707e-06, |
|
"loss": 0.3705, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 12.004123711340206, |
|
"grad_norm": 96.77040100097656, |
|
"learning_rate": 8.384879725085911e-06, |
|
"loss": 0.503, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 12.005154639175258, |
|
"grad_norm": 0.21895720064640045, |
|
"learning_rate": 8.373424971363117e-06, |
|
"loss": 0.5193, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 12.00618556701031, |
|
"grad_norm": 0.8776753544807434, |
|
"learning_rate": 8.361970217640322e-06, |
|
"loss": 0.3678, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.00721649484536, |
|
"grad_norm": 0.986826479434967, |
|
"learning_rate": 8.350515463917526e-06, |
|
"loss": 0.9035, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 12.008247422680412, |
|
"grad_norm": 33.315673828125, |
|
"learning_rate": 8.339060710194732e-06, |
|
"loss": 0.5579, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 12.009278350515464, |
|
"grad_norm": 25.64463996887207, |
|
"learning_rate": 8.327605956471937e-06, |
|
"loss": 0.4433, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 12.010309278350515, |
|
"grad_norm": 61.11088180541992, |
|
"learning_rate": 8.316151202749141e-06, |
|
"loss": 0.4935, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 12.011340206185567, |
|
"grad_norm": 0.02137608826160431, |
|
"learning_rate": 8.304696449026347e-06, |
|
"loss": 0.1616, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 12.012371134020619, |
|
"grad_norm": 54.19511413574219, |
|
"learning_rate": 8.293241695303552e-06, |
|
"loss": 1.0093, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 12.01340206185567, |
|
"grad_norm": 8.395423889160156, |
|
"learning_rate": 8.281786941580758e-06, |
|
"loss": 0.1185, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 12.014432989690722, |
|
"grad_norm": 42.02037811279297, |
|
"learning_rate": 8.27033218785796e-06, |
|
"loss": 0.8235, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 12.015463917525773, |
|
"grad_norm": 22.178823471069336, |
|
"learning_rate": 8.258877434135167e-06, |
|
"loss": 0.5585, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 12.016494845360825, |
|
"grad_norm": 2.1525092124938965, |
|
"learning_rate": 8.247422680412371e-06, |
|
"loss": 0.7552, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.017525773195876, |
|
"grad_norm": 42.38743209838867, |
|
"learning_rate": 8.235967926689576e-06, |
|
"loss": 0.3942, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 12.018556701030928, |
|
"grad_norm": 6.113926410675049, |
|
"learning_rate": 8.224513172966782e-06, |
|
"loss": 0.4449, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 12.01958762886598, |
|
"grad_norm": 1.72330904006958, |
|
"learning_rate": 8.213058419243986e-06, |
|
"loss": 0.3118, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 12.020103092783506, |
|
"eval_accuracy": 0.6222222222222222, |
|
"eval_loss": 1.0597846508026123, |
|
"eval_runtime": 12.6829, |
|
"eval_samples_per_second": 3.548, |
|
"eval_steps_per_second": 0.946, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 13.000515463917527, |
|
"grad_norm": 8.192316055297852, |
|
"learning_rate": 8.201603665521193e-06, |
|
"loss": 0.4995, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 13.001546391752578, |
|
"grad_norm": 45.1771125793457, |
|
"learning_rate": 8.190148911798397e-06, |
|
"loss": 0.3395, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 13.00257731958763, |
|
"grad_norm": 42.817996978759766, |
|
"learning_rate": 8.178694158075601e-06, |
|
"loss": 0.7111, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 13.003608247422681, |
|
"grad_norm": 63.306854248046875, |
|
"learning_rate": 8.167239404352808e-06, |
|
"loss": 0.3741, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 13.004639175257733, |
|
"grad_norm": 14.730040550231934, |
|
"learning_rate": 8.155784650630012e-06, |
|
"loss": 0.4185, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 13.005670103092784, |
|
"grad_norm": 7.792994499206543, |
|
"learning_rate": 8.144329896907216e-06, |
|
"loss": 0.7574, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 13.006701030927836, |
|
"grad_norm": 3.4603207111358643, |
|
"learning_rate": 8.132875143184423e-06, |
|
"loss": 0.4004, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 13.007731958762887, |
|
"grad_norm": 34.50138854980469, |
|
"learning_rate": 8.121420389461627e-06, |
|
"loss": 0.7797, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 13.008762886597939, |
|
"grad_norm": 30.966228485107422, |
|
"learning_rate": 8.109965635738832e-06, |
|
"loss": 0.308, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 13.00979381443299, |
|
"grad_norm": 14.006564140319824, |
|
"learning_rate": 8.098510882016038e-06, |
|
"loss": 0.1939, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 13.010824742268042, |
|
"grad_norm": 31.138307571411133, |
|
"learning_rate": 8.087056128293242e-06, |
|
"loss": 0.308, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 13.011855670103094, |
|
"grad_norm": 17.796743392944336, |
|
"learning_rate": 8.075601374570448e-06, |
|
"loss": 0.3016, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 13.012886597938145, |
|
"grad_norm": 34.422054290771484, |
|
"learning_rate": 8.064146620847653e-06, |
|
"loss": 0.4823, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 13.013917525773197, |
|
"grad_norm": 0.29599234461784363, |
|
"learning_rate": 8.052691867124857e-06, |
|
"loss": 0.1329, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 13.014948453608248, |
|
"grad_norm": 46.61289978027344, |
|
"learning_rate": 8.041237113402063e-06, |
|
"loss": 0.5176, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 13.0159793814433, |
|
"grad_norm": 20.459152221679688, |
|
"learning_rate": 8.029782359679268e-06, |
|
"loss": 0.5521, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 13.017010309278351, |
|
"grad_norm": 48.8476676940918, |
|
"learning_rate": 8.018327605956472e-06, |
|
"loss": 0.3581, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.018041237113403, |
|
"grad_norm": 36.25406265258789, |
|
"learning_rate": 8.006872852233678e-06, |
|
"loss": 0.5387, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 13.019072164948454, |
|
"grad_norm": 15.75755786895752, |
|
"learning_rate": 7.995418098510883e-06, |
|
"loss": 0.3677, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 13.020103092783506, |
|
"grad_norm": 0.06314048171043396, |
|
"learning_rate": 7.983963344788089e-06, |
|
"loss": 0.5823, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 13.020103092783506, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7085598111152649, |
|
"eval_runtime": 14.437, |
|
"eval_samples_per_second": 3.117, |
|
"eval_steps_per_second": 0.831, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 14.001030927835052, |
|
"grad_norm": 15.936594009399414, |
|
"learning_rate": 7.972508591065293e-06, |
|
"loss": 0.2965, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 14.002061855670103, |
|
"grad_norm": 34.82296371459961, |
|
"learning_rate": 7.961053837342498e-06, |
|
"loss": 0.3932, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 14.003092783505155, |
|
"grad_norm": 24.281137466430664, |
|
"learning_rate": 7.949599083619702e-06, |
|
"loss": 0.2176, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 14.004123711340206, |
|
"grad_norm": 3.835561990737915, |
|
"learning_rate": 7.938144329896907e-06, |
|
"loss": 0.3466, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 14.005154639175258, |
|
"grad_norm": 59.81629943847656, |
|
"learning_rate": 7.926689576174113e-06, |
|
"loss": 0.4342, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 14.00618556701031, |
|
"grad_norm": 21.131526947021484, |
|
"learning_rate": 7.915234822451317e-06, |
|
"loss": 0.3176, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 14.00721649484536, |
|
"grad_norm": 28.3918399810791, |
|
"learning_rate": 7.903780068728523e-06, |
|
"loss": 0.508, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.008247422680412, |
|
"grad_norm": 68.05310821533203, |
|
"learning_rate": 7.892325315005728e-06, |
|
"loss": 0.2753, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 14.009278350515464, |
|
"grad_norm": 15.491647720336914, |
|
"learning_rate": 7.880870561282932e-06, |
|
"loss": 0.233, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 14.010309278350515, |
|
"grad_norm": 46.456077575683594, |
|
"learning_rate": 7.869415807560138e-06, |
|
"loss": 0.2713, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 14.011340206185567, |
|
"grad_norm": 11.542130470275879, |
|
"learning_rate": 7.857961053837343e-06, |
|
"loss": 0.488, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 14.012371134020619, |
|
"grad_norm": 21.926034927368164, |
|
"learning_rate": 7.846506300114547e-06, |
|
"loss": 1.0477, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 14.01340206185567, |
|
"grad_norm": 0.11074031889438629, |
|
"learning_rate": 7.835051546391754e-06, |
|
"loss": 0.3838, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 14.014432989690722, |
|
"grad_norm": 21.328231811523438, |
|
"learning_rate": 7.823596792668958e-06, |
|
"loss": 0.6549, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 14.015463917525773, |
|
"grad_norm": 45.43595886230469, |
|
"learning_rate": 7.812142038946164e-06, |
|
"loss": 0.5422, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 14.016494845360825, |
|
"grad_norm": 43.23012161254883, |
|
"learning_rate": 7.800687285223369e-06, |
|
"loss": 0.348, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 14.017525773195876, |
|
"grad_norm": 97.96592712402344, |
|
"learning_rate": 7.789232531500573e-06, |
|
"loss": 0.3177, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 14.018556701030928, |
|
"grad_norm": 24.8525390625, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.46, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 14.01958762886598, |
|
"grad_norm": 0.3702309727668762, |
|
"learning_rate": 7.766323024054984e-06, |
|
"loss": 0.2604, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 14.020103092783506, |
|
"eval_accuracy": 0.6222222222222222, |
|
"eval_loss": 1.4168084859848022, |
|
"eval_runtime": 13.2629, |
|
"eval_samples_per_second": 3.393, |
|
"eval_steps_per_second": 0.905, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 15.000515463917527, |
|
"grad_norm": 21.372608184814453, |
|
"learning_rate": 7.754868270332188e-06, |
|
"loss": 0.5669, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 15.001546391752578, |
|
"grad_norm": 2.491281270980835, |
|
"learning_rate": 7.743413516609394e-06, |
|
"loss": 0.5194, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 15.00257731958763, |
|
"grad_norm": 30.641578674316406, |
|
"learning_rate": 7.731958762886599e-06, |
|
"loss": 0.3343, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 15.003608247422681, |
|
"grad_norm": 61.93765640258789, |
|
"learning_rate": 7.720504009163803e-06, |
|
"loss": 0.399, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 15.004639175257733, |
|
"grad_norm": 41.56774139404297, |
|
"learning_rate": 7.70904925544101e-06, |
|
"loss": 0.2883, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 15.005670103092784, |
|
"grad_norm": 31.026432037353516, |
|
"learning_rate": 7.697594501718214e-06, |
|
"loss": 0.5769, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 15.006701030927836, |
|
"grad_norm": 32.793243408203125, |
|
"learning_rate": 7.68613974799542e-06, |
|
"loss": 0.5666, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 15.007731958762887, |
|
"grad_norm": 68.84432220458984, |
|
"learning_rate": 7.674684994272624e-06, |
|
"loss": 0.5853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.008762886597939, |
|
"grad_norm": 79.7370376586914, |
|
"learning_rate": 7.663230240549829e-06, |
|
"loss": 0.3274, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 15.00979381443299, |
|
"grad_norm": 0.1271493136882782, |
|
"learning_rate": 7.651775486827033e-06, |
|
"loss": 0.286, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 15.010824742268042, |
|
"grad_norm": 5.414377212524414, |
|
"learning_rate": 7.640320733104238e-06, |
|
"loss": 0.2541, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 15.011855670103094, |
|
"grad_norm": 7.974775791168213, |
|
"learning_rate": 7.628865979381444e-06, |
|
"loss": 0.6325, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 15.012886597938145, |
|
"grad_norm": 2.6058754920959473, |
|
"learning_rate": 7.617411225658648e-06, |
|
"loss": 0.6288, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 15.013917525773197, |
|
"grad_norm": 5.675416469573975, |
|
"learning_rate": 7.6059564719358535e-06, |
|
"loss": 0.2737, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 15.014948453608248, |
|
"grad_norm": 21.808191299438477, |
|
"learning_rate": 7.594501718213059e-06, |
|
"loss": 0.4687, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 15.0159793814433, |
|
"grad_norm": 4.567986011505127, |
|
"learning_rate": 7.583046964490264e-06, |
|
"loss": 0.2163, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 15.017010309278351, |
|
"grad_norm": 30.94685173034668, |
|
"learning_rate": 7.5715922107674686e-06, |
|
"loss": 0.3175, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 15.018041237113403, |
|
"grad_norm": 8.627877235412598, |
|
"learning_rate": 7.560137457044674e-06, |
|
"loss": 0.1734, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 15.019072164948454, |
|
"grad_norm": 91.24742889404297, |
|
"learning_rate": 7.548682703321879e-06, |
|
"loss": 0.3208, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 15.020103092783506, |
|
"grad_norm": 0.007905744016170502, |
|
"learning_rate": 7.5372279495990845e-06, |
|
"loss": 0.5767, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 15.020103092783506, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7965757846832275, |
|
"eval_runtime": 13.3334, |
|
"eval_samples_per_second": 3.375, |
|
"eval_steps_per_second": 0.9, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 16.001030927835053, |
|
"grad_norm": 4.9155964851379395, |
|
"learning_rate": 7.525773195876289e-06, |
|
"loss": 0.3086, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 16.002061855670103, |
|
"grad_norm": 41.20609664916992, |
|
"learning_rate": 7.514318442153494e-06, |
|
"loss": 0.7778, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 16.003092783505156, |
|
"grad_norm": 31.576202392578125, |
|
"learning_rate": 7.5028636884306995e-06, |
|
"loss": 0.0978, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 16.004123711340206, |
|
"grad_norm": 0.07261794060468674, |
|
"learning_rate": 7.491408934707905e-06, |
|
"loss": 0.2957, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 16.00515463917526, |
|
"grad_norm": 34.24305725097656, |
|
"learning_rate": 7.479954180985109e-06, |
|
"loss": 0.6108, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 16.00618556701031, |
|
"grad_norm": 64.02971649169922, |
|
"learning_rate": 7.4684994272623145e-06, |
|
"loss": 0.256, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 16.007216494845363, |
|
"grad_norm": 31.043434143066406, |
|
"learning_rate": 7.45704467353952e-06, |
|
"loss": 0.8459, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 16.008247422680412, |
|
"grad_norm": 100.2379379272461, |
|
"learning_rate": 7.445589919816725e-06, |
|
"loss": 0.391, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 16.009278350515466, |
|
"grad_norm": 34.66457748413086, |
|
"learning_rate": 7.43413516609393e-06, |
|
"loss": 0.7614, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 16.010309278350515, |
|
"grad_norm": 78.97329711914062, |
|
"learning_rate": 7.422680412371135e-06, |
|
"loss": 0.5367, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 16.01134020618557, |
|
"grad_norm": 55.71527862548828, |
|
"learning_rate": 7.41122565864834e-06, |
|
"loss": 0.5274, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 16.01237113402062, |
|
"grad_norm": 10.22823715209961, |
|
"learning_rate": 7.3997709049255455e-06, |
|
"loss": 0.3361, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 16.013402061855672, |
|
"grad_norm": 2.243086814880371, |
|
"learning_rate": 7.38831615120275e-06, |
|
"loss": 0.7985, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 16.01443298969072, |
|
"grad_norm": 4.293857574462891, |
|
"learning_rate": 7.376861397479955e-06, |
|
"loss": 0.2727, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 16.015463917525775, |
|
"grad_norm": 0.2114896923303604, |
|
"learning_rate": 7.3654066437571605e-06, |
|
"loss": 0.2435, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 16.016494845360825, |
|
"grad_norm": 36.44148635864258, |
|
"learning_rate": 7.353951890034365e-06, |
|
"loss": 0.5246, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 16.017525773195878, |
|
"grad_norm": 51.74038314819336, |
|
"learning_rate": 7.342497136311569e-06, |
|
"loss": 0.0543, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 16.018556701030928, |
|
"grad_norm": 50.51845169067383, |
|
"learning_rate": 7.331042382588775e-06, |
|
"loss": 0.7459, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 16.01958762886598, |
|
"grad_norm": 37.770626068115234, |
|
"learning_rate": 7.319587628865979e-06, |
|
"loss": 0.4844, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 16.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.0488171577453613, |
|
"eval_runtime": 12.4047, |
|
"eval_samples_per_second": 3.628, |
|
"eval_steps_per_second": 0.967, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 17.000515463917527, |
|
"grad_norm": 97.32837677001953, |
|
"learning_rate": 7.3081328751431845e-06, |
|
"loss": 0.1819, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 17.001546391752576, |
|
"grad_norm": 4.759186744689941, |
|
"learning_rate": 7.29667812142039e-06, |
|
"loss": 0.2479, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 17.00257731958763, |
|
"grad_norm": 1.728135347366333, |
|
"learning_rate": 7.285223367697595e-06, |
|
"loss": 0.5371, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 17.00360824742268, |
|
"grad_norm": 0.7207375168800354, |
|
"learning_rate": 7.2737686139747995e-06, |
|
"loss": 0.2002, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 17.004639175257733, |
|
"grad_norm": 32.98098373413086, |
|
"learning_rate": 7.262313860252005e-06, |
|
"loss": 0.4171, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 17.005670103092783, |
|
"grad_norm": 1.2624742984771729, |
|
"learning_rate": 7.25085910652921e-06, |
|
"loss": 0.2694, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 17.006701030927836, |
|
"grad_norm": 40.10099411010742, |
|
"learning_rate": 7.239404352806415e-06, |
|
"loss": 0.4878, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 17.007731958762886, |
|
"grad_norm": 0.11596504598855972, |
|
"learning_rate": 7.22794959908362e-06, |
|
"loss": 0.5169, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 17.00876288659794, |
|
"grad_norm": 52.03158187866211, |
|
"learning_rate": 7.216494845360825e-06, |
|
"loss": 0.6374, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 17.00979381443299, |
|
"grad_norm": 20.594680786132812, |
|
"learning_rate": 7.2050400916380304e-06, |
|
"loss": 0.8605, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 17.010824742268042, |
|
"grad_norm": 61.95948791503906, |
|
"learning_rate": 7.193585337915236e-06, |
|
"loss": 0.357, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 17.011855670103092, |
|
"grad_norm": 56.67446517944336, |
|
"learning_rate": 7.18213058419244e-06, |
|
"loss": 0.3436, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 17.012886597938145, |
|
"grad_norm": 0.013172958046197891, |
|
"learning_rate": 7.1706758304696455e-06, |
|
"loss": 0.1324, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 17.013917525773195, |
|
"grad_norm": 31.45111846923828, |
|
"learning_rate": 7.159221076746851e-06, |
|
"loss": 0.1581, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 17.01494845360825, |
|
"grad_norm": 49.000885009765625, |
|
"learning_rate": 7.147766323024056e-06, |
|
"loss": 0.2419, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 17.015979381443298, |
|
"grad_norm": 36.576114654541016, |
|
"learning_rate": 7.1363115693012605e-06, |
|
"loss": 0.4095, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 17.01701030927835, |
|
"grad_norm": 2.826425552368164, |
|
"learning_rate": 7.124856815578466e-06, |
|
"loss": 0.2097, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 17.0180412371134, |
|
"grad_norm": 43.49801254272461, |
|
"learning_rate": 7.113402061855671e-06, |
|
"loss": 0.2707, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 17.019072164948454, |
|
"grad_norm": 0.675110399723053, |
|
"learning_rate": 7.101947308132876e-06, |
|
"loss": 0.3188, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.020103092783504, |
|
"grad_norm": 0.0034136411268264055, |
|
"learning_rate": 7.090492554410081e-06, |
|
"loss": 0.0729, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 17.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.0075429677963257, |
|
"eval_runtime": 12.9969, |
|
"eval_samples_per_second": 3.462, |
|
"eval_steps_per_second": 0.923, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 18.001030927835053, |
|
"grad_norm": 19.52350425720215, |
|
"learning_rate": 7.079037800687286e-06, |
|
"loss": 0.4801, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 18.002061855670103, |
|
"grad_norm": 0.09289558976888657, |
|
"learning_rate": 7.0675830469644914e-06, |
|
"loss": 0.2281, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 18.003092783505156, |
|
"grad_norm": 38.042137145996094, |
|
"learning_rate": 7.056128293241697e-06, |
|
"loss": 0.1157, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 18.004123711340206, |
|
"grad_norm": 2.72111439704895, |
|
"learning_rate": 7.044673539518901e-06, |
|
"loss": 0.3428, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 18.00515463917526, |
|
"grad_norm": 1.5013760328292847, |
|
"learning_rate": 7.0332187857961065e-06, |
|
"loss": 0.5245, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 18.00618556701031, |
|
"grad_norm": 123.11444854736328, |
|
"learning_rate": 7.02176403207331e-06, |
|
"loss": 0.6386, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 18.007216494845363, |
|
"grad_norm": 5.601233959197998, |
|
"learning_rate": 7.010309278350515e-06, |
|
"loss": 0.3835, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 18.008247422680412, |
|
"grad_norm": 0.00440385052934289, |
|
"learning_rate": 6.998854524627721e-06, |
|
"loss": 0.6067, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 18.009278350515466, |
|
"grad_norm": 16.199575424194336, |
|
"learning_rate": 6.987399770904926e-06, |
|
"loss": 0.4997, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 18.010309278350515, |
|
"grad_norm": 65.31697082519531, |
|
"learning_rate": 6.9759450171821304e-06, |
|
"loss": 0.3521, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 18.01134020618557, |
|
"grad_norm": 9.682928085327148, |
|
"learning_rate": 6.964490263459336e-06, |
|
"loss": 0.3728, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 18.01237113402062, |
|
"grad_norm": 61.22377014160156, |
|
"learning_rate": 6.953035509736541e-06, |
|
"loss": 0.7717, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 18.013402061855672, |
|
"grad_norm": 55.61003494262695, |
|
"learning_rate": 6.941580756013746e-06, |
|
"loss": 0.4314, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 18.01443298969072, |
|
"grad_norm": 9.212549209594727, |
|
"learning_rate": 6.930126002290951e-06, |
|
"loss": 0.5837, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 18.015463917525775, |
|
"grad_norm": 9.7285737991333, |
|
"learning_rate": 6.918671248568156e-06, |
|
"loss": 0.3758, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 18.016494845360825, |
|
"grad_norm": 40.76766586303711, |
|
"learning_rate": 6.907216494845361e-06, |
|
"loss": 0.1162, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 18.017525773195878, |
|
"grad_norm": 50.804962158203125, |
|
"learning_rate": 6.895761741122567e-06, |
|
"loss": 0.0843, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 18.018556701030928, |
|
"grad_norm": 2.414264440536499, |
|
"learning_rate": 6.884306987399771e-06, |
|
"loss": 0.1919, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 18.01958762886598, |
|
"grad_norm": 1.428833246231079, |
|
"learning_rate": 6.872852233676976e-06, |
|
"loss": 0.4188, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 18.020103092783504, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 1.2723888158798218, |
|
"eval_runtime": 12.9654, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.926, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 19.000515463917527, |
|
"grad_norm": 44.43056869506836, |
|
"learning_rate": 6.861397479954182e-06, |
|
"loss": 0.2822, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 19.001546391752576, |
|
"grad_norm": 0.05244099348783493, |
|
"learning_rate": 6.849942726231387e-06, |
|
"loss": 0.1549, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 19.00257731958763, |
|
"grad_norm": 0.09560086578130722, |
|
"learning_rate": 6.8384879725085914e-06, |
|
"loss": 0.1708, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 19.00360824742268, |
|
"grad_norm": 0.2790253758430481, |
|
"learning_rate": 6.827033218785797e-06, |
|
"loss": 0.0709, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 19.004639175257733, |
|
"grad_norm": 4.10269021987915, |
|
"learning_rate": 6.815578465063002e-06, |
|
"loss": 0.46, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 19.005670103092783, |
|
"grad_norm": 22.924274444580078, |
|
"learning_rate": 6.804123711340207e-06, |
|
"loss": 0.4041, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 19.006701030927836, |
|
"grad_norm": 0.1639637053012848, |
|
"learning_rate": 6.792668957617412e-06, |
|
"loss": 0.3816, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 19.007731958762886, |
|
"grad_norm": 0.21744728088378906, |
|
"learning_rate": 6.781214203894617e-06, |
|
"loss": 0.1098, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 19.00876288659794, |
|
"grad_norm": 135.28933715820312, |
|
"learning_rate": 6.769759450171822e-06, |
|
"loss": 0.3937, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 19.00979381443299, |
|
"grad_norm": 104.67581176757812, |
|
"learning_rate": 6.758304696449028e-06, |
|
"loss": 0.4015, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 19.010824742268042, |
|
"grad_norm": 68.89933776855469, |
|
"learning_rate": 6.746849942726232e-06, |
|
"loss": 0.2513, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 19.011855670103092, |
|
"grad_norm": 0.19136613607406616, |
|
"learning_rate": 6.735395189003437e-06, |
|
"loss": 0.1594, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 19.012886597938145, |
|
"grad_norm": 106.2833023071289, |
|
"learning_rate": 6.723940435280643e-06, |
|
"loss": 0.6457, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 19.013917525773195, |
|
"grad_norm": 12.311858177185059, |
|
"learning_rate": 6.712485681557846e-06, |
|
"loss": 0.2894, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 19.01494845360825, |
|
"grad_norm": 10.986001014709473, |
|
"learning_rate": 6.701030927835052e-06, |
|
"loss": 0.6104, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 19.015979381443298, |
|
"grad_norm": 9.053174018859863, |
|
"learning_rate": 6.689576174112257e-06, |
|
"loss": 0.6795, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 19.01701030927835, |
|
"grad_norm": 0.6393508911132812, |
|
"learning_rate": 6.678121420389461e-06, |
|
"loss": 0.1746, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 19.0180412371134, |
|
"grad_norm": 41.1427001953125, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1791, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 19.019072164948454, |
|
"grad_norm": 0.29769831895828247, |
|
"learning_rate": 6.655211912943872e-06, |
|
"loss": 0.1533, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 19.020103092783504, |
|
"grad_norm": 0.005904354155063629, |
|
"learning_rate": 6.643757159221077e-06, |
|
"loss": 0.247, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 19.020103092783504, |
|
"eval_accuracy": 0.7555555555555555, |
|
"eval_loss": 1.3884011507034302, |
|
"eval_runtime": 12.8798, |
|
"eval_samples_per_second": 3.494, |
|
"eval_steps_per_second": 0.932, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.001030927835053, |
|
"grad_norm": 121.39861297607422, |
|
"learning_rate": 6.632302405498282e-06, |
|
"loss": 0.4984, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 20.002061855670103, |
|
"grad_norm": 80.33428192138672, |
|
"learning_rate": 6.620847651775487e-06, |
|
"loss": 0.4636, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 20.003092783505156, |
|
"grad_norm": 0.3919704854488373, |
|
"learning_rate": 6.609392898052692e-06, |
|
"loss": 0.3251, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 20.004123711340206, |
|
"grad_norm": 0.08509159088134766, |
|
"learning_rate": 6.597938144329898e-06, |
|
"loss": 0.1908, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 20.00515463917526, |
|
"grad_norm": 0.09845045208930969, |
|
"learning_rate": 6.586483390607102e-06, |
|
"loss": 0.4081, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 20.00618556701031, |
|
"grad_norm": 0.17746928334236145, |
|
"learning_rate": 6.575028636884307e-06, |
|
"loss": 0.1395, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 20.007216494845363, |
|
"grad_norm": 133.173828125, |
|
"learning_rate": 6.563573883161513e-06, |
|
"loss": 0.4535, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 20.008247422680412, |
|
"grad_norm": 0.980322003364563, |
|
"learning_rate": 6.552119129438718e-06, |
|
"loss": 0.473, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 20.009278350515466, |
|
"grad_norm": 21.204669952392578, |
|
"learning_rate": 6.540664375715922e-06, |
|
"loss": 0.2637, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 20.010309278350515, |
|
"grad_norm": 88.95044708251953, |
|
"learning_rate": 6.529209621993128e-06, |
|
"loss": 0.2103, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 20.01134020618557, |
|
"grad_norm": 0.22797317802906036, |
|
"learning_rate": 6.517754868270333e-06, |
|
"loss": 0.3344, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 20.01237113402062, |
|
"grad_norm": 0.12223013490438461, |
|
"learning_rate": 6.506300114547538e-06, |
|
"loss": 0.0848, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 20.013402061855672, |
|
"grad_norm": 0.11136689782142639, |
|
"learning_rate": 6.494845360824743e-06, |
|
"loss": 0.3754, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 20.01443298969072, |
|
"grad_norm": 0.18626107275485992, |
|
"learning_rate": 6.483390607101948e-06, |
|
"loss": 0.1341, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 20.015463917525775, |
|
"grad_norm": 10.783482551574707, |
|
"learning_rate": 6.471935853379153e-06, |
|
"loss": 0.0183, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 20.016494845360825, |
|
"grad_norm": 0.06843023747205734, |
|
"learning_rate": 6.460481099656359e-06, |
|
"loss": 0.2099, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 20.017525773195878, |
|
"grad_norm": 0.16018100082874298, |
|
"learning_rate": 6.449026345933563e-06, |
|
"loss": 0.1934, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 20.018556701030928, |
|
"grad_norm": 0.03921249136328697, |
|
"learning_rate": 6.437571592210768e-06, |
|
"loss": 0.2822, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 20.01958762886598, |
|
"grad_norm": 73.11908721923828, |
|
"learning_rate": 6.426116838487974e-06, |
|
"loss": 0.8041, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 20.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.1552423238754272, |
|
"eval_runtime": 12.8488, |
|
"eval_samples_per_second": 3.502, |
|
"eval_steps_per_second": 0.934, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 21.000515463917527, |
|
"grad_norm": 0.8366032242774963, |
|
"learning_rate": 6.414662084765179e-06, |
|
"loss": 0.4461, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 21.001546391752576, |
|
"grad_norm": 0.06696546822786331, |
|
"learning_rate": 6.4032073310423825e-06, |
|
"loss": 0.3449, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 21.00257731958763, |
|
"grad_norm": 55.97826385498047, |
|
"learning_rate": 6.391752577319588e-06, |
|
"loss": 0.6324, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 21.00360824742268, |
|
"grad_norm": 30.455825805664062, |
|
"learning_rate": 6.380297823596793e-06, |
|
"loss": 0.4075, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 21.004639175257733, |
|
"grad_norm": 0.05050866678357124, |
|
"learning_rate": 6.3688430698739976e-06, |
|
"loss": 0.0636, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 21.005670103092783, |
|
"grad_norm": 114.8602294921875, |
|
"learning_rate": 6.357388316151203e-06, |
|
"loss": 0.3984, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 21.006701030927836, |
|
"grad_norm": 8.037965774536133, |
|
"learning_rate": 6.345933562428408e-06, |
|
"loss": 0.1513, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 21.007731958762886, |
|
"grad_norm": 0.20525996387004852, |
|
"learning_rate": 6.3344788087056135e-06, |
|
"loss": 0.1569, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 21.00876288659794, |
|
"grad_norm": 0.16769638657569885, |
|
"learning_rate": 6.323024054982818e-06, |
|
"loss": 0.2572, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 21.00979381443299, |
|
"grad_norm": 12.56114387512207, |
|
"learning_rate": 6.311569301260023e-06, |
|
"loss": 0.1308, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 21.010824742268042, |
|
"grad_norm": 0.05304228886961937, |
|
"learning_rate": 6.3001145475372285e-06, |
|
"loss": 0.2517, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 21.011855670103092, |
|
"grad_norm": 0.02033202536404133, |
|
"learning_rate": 6.288659793814433e-06, |
|
"loss": 0.1589, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 21.012886597938145, |
|
"grad_norm": 40.56743240356445, |
|
"learning_rate": 6.277205040091638e-06, |
|
"loss": 0.1517, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 21.013917525773195, |
|
"grad_norm": 0.18166792392730713, |
|
"learning_rate": 6.2657502863688435e-06, |
|
"loss": 0.1245, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 21.01494845360825, |
|
"grad_norm": 168.2699432373047, |
|
"learning_rate": 6.254295532646049e-06, |
|
"loss": 0.751, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 21.015979381443298, |
|
"grad_norm": 63.15861511230469, |
|
"learning_rate": 6.242840778923253e-06, |
|
"loss": 0.4252, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 21.01701030927835, |
|
"grad_norm": 0.03196326643228531, |
|
"learning_rate": 6.231386025200459e-06, |
|
"loss": 0.188, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 21.0180412371134, |
|
"grad_norm": 0.10200751572847366, |
|
"learning_rate": 6.219931271477664e-06, |
|
"loss": 0.44, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 21.019072164948454, |
|
"grad_norm": 0.9600178003311157, |
|
"learning_rate": 6.208476517754869e-06, |
|
"loss": 0.6086, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 21.020103092783504, |
|
"grad_norm": 0.15486416220664978, |
|
"learning_rate": 6.197021764032074e-06, |
|
"loss": 0.168, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 21.020103092783504, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.7923972606658936, |
|
"eval_runtime": 13.1906, |
|
"eval_samples_per_second": 3.412, |
|
"eval_steps_per_second": 0.91, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 22.001030927835053, |
|
"grad_norm": 0.07751865684986115, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 0.187, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 22.002061855670103, |
|
"grad_norm": 10.4796142578125, |
|
"learning_rate": 6.174112256586484e-06, |
|
"loss": 0.6962, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 22.003092783505156, |
|
"grad_norm": 92.16351318359375, |
|
"learning_rate": 6.1626575028636895e-06, |
|
"loss": 0.2417, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 22.004123711340206, |
|
"grad_norm": 45.09387969970703, |
|
"learning_rate": 6.151202749140894e-06, |
|
"loss": 0.3242, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 22.00515463917526, |
|
"grad_norm": 78.62527465820312, |
|
"learning_rate": 6.139747995418099e-06, |
|
"loss": 0.1895, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 22.00618556701031, |
|
"grad_norm": 24.385234832763672, |
|
"learning_rate": 6.1282932416953046e-06, |
|
"loss": 0.487, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 22.007216494845363, |
|
"grad_norm": 0.06612977385520935, |
|
"learning_rate": 6.11683848797251e-06, |
|
"loss": 0.3641, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 22.008247422680412, |
|
"grad_norm": 37.19966125488281, |
|
"learning_rate": 6.105383734249714e-06, |
|
"loss": 0.4266, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 22.009278350515466, |
|
"grad_norm": 121.07097625732422, |
|
"learning_rate": 6.09392898052692e-06, |
|
"loss": 0.4789, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 22.010309278350515, |
|
"grad_norm": 40.00523376464844, |
|
"learning_rate": 6.082474226804124e-06, |
|
"loss": 0.4866, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 22.01134020618557, |
|
"grad_norm": 0.05597120150923729, |
|
"learning_rate": 6.0710194730813285e-06, |
|
"loss": 0.3815, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 22.01237113402062, |
|
"grad_norm": 0.8036267757415771, |
|
"learning_rate": 6.059564719358534e-06, |
|
"loss": 0.1899, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 22.013402061855672, |
|
"grad_norm": 144.4720001220703, |
|
"learning_rate": 6.048109965635739e-06, |
|
"loss": 0.4231, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 22.01443298969072, |
|
"grad_norm": 0.2990947961807251, |
|
"learning_rate": 6.036655211912944e-06, |
|
"loss": 0.2152, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 22.015463917525775, |
|
"grad_norm": 0.10453028976917267, |
|
"learning_rate": 6.025200458190149e-06, |
|
"loss": 0.404, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 22.016494845360825, |
|
"grad_norm": 112.81282043457031, |
|
"learning_rate": 6.013745704467354e-06, |
|
"loss": 0.1327, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 22.017525773195878, |
|
"grad_norm": 0.112935371696949, |
|
"learning_rate": 6.0022909507445594e-06, |
|
"loss": 0.117, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 22.018556701030928, |
|
"grad_norm": 4.366937160491943, |
|
"learning_rate": 5.990836197021765e-06, |
|
"loss": 0.4038, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 22.01958762886598, |
|
"grad_norm": 0.005484442692250013, |
|
"learning_rate": 5.979381443298969e-06, |
|
"loss": 0.2564, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 22.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.5681549310684204, |
|
"eval_runtime": 21.1102, |
|
"eval_samples_per_second": 2.132, |
|
"eval_steps_per_second": 0.568, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 23.000515463917527, |
|
"grad_norm": 14.104279518127441, |
|
"learning_rate": 5.9679266895761745e-06, |
|
"loss": 0.8201, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 23.001546391752576, |
|
"grad_norm": 1.4341076612472534, |
|
"learning_rate": 5.95647193585338e-06, |
|
"loss": 0.0827, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.00257731958763, |
|
"grad_norm": 13.500885963439941, |
|
"learning_rate": 5.945017182130585e-06, |
|
"loss": 0.1132, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 23.00360824742268, |
|
"grad_norm": 0.08413302898406982, |
|
"learning_rate": 5.9335624284077895e-06, |
|
"loss": 0.2336, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 23.004639175257733, |
|
"grad_norm": 18.12093734741211, |
|
"learning_rate": 5.922107674684995e-06, |
|
"loss": 0.4751, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 23.005670103092783, |
|
"grad_norm": 1.0863031148910522, |
|
"learning_rate": 5.9106529209622e-06, |
|
"loss": 0.1428, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 23.006701030927836, |
|
"grad_norm": 0.06023244187235832, |
|
"learning_rate": 5.8991981672394046e-06, |
|
"loss": 0.1603, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 23.007731958762886, |
|
"grad_norm": 2.2026829719543457, |
|
"learning_rate": 5.88774341351661e-06, |
|
"loss": 0.3565, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 23.00876288659794, |
|
"grad_norm": 100.63137817382812, |
|
"learning_rate": 5.876288659793815e-06, |
|
"loss": 0.1315, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 23.00979381443299, |
|
"grad_norm": 399.76123046875, |
|
"learning_rate": 5.8648339060710204e-06, |
|
"loss": 0.3714, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 23.010824742268042, |
|
"grad_norm": 154.24449157714844, |
|
"learning_rate": 5.853379152348225e-06, |
|
"loss": 0.3266, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 23.011855670103092, |
|
"grad_norm": 0.03562017157673836, |
|
"learning_rate": 5.84192439862543e-06, |
|
"loss": 0.3065, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 23.012886597938145, |
|
"grad_norm": 0.08134841173887253, |
|
"learning_rate": 5.8304696449026355e-06, |
|
"loss": 0.3782, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 23.013917525773195, |
|
"grad_norm": 181.0287628173828, |
|
"learning_rate": 5.819014891179841e-06, |
|
"loss": 0.4084, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 23.01494845360825, |
|
"grad_norm": 2.42154598236084, |
|
"learning_rate": 5.807560137457045e-06, |
|
"loss": 0.0032, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 23.015979381443298, |
|
"grad_norm": 123.42518615722656, |
|
"learning_rate": 5.7961053837342505e-06, |
|
"loss": 0.4781, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 23.01701030927835, |
|
"grad_norm": 3.3009181022644043, |
|
"learning_rate": 5.784650630011456e-06, |
|
"loss": 0.0389, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 23.0180412371134, |
|
"grad_norm": 0.07052547484636307, |
|
"learning_rate": 5.7731958762886594e-06, |
|
"loss": 0.3134, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 23.019072164948454, |
|
"grad_norm": 0.1382911652326584, |
|
"learning_rate": 5.761741122565865e-06, |
|
"loss": 0.5257, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 23.020103092783504, |
|
"grad_norm": 0.008108101785182953, |
|
"learning_rate": 5.75028636884307e-06, |
|
"loss": 0.2034, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 23.020103092783504, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.6061440706253052, |
|
"eval_runtime": 12.9527, |
|
"eval_samples_per_second": 3.474, |
|
"eval_steps_per_second": 0.926, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 24.001030927835053, |
|
"grad_norm": 0.15025880932807922, |
|
"learning_rate": 5.738831615120275e-06, |
|
"loss": 0.0039, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 24.002061855670103, |
|
"grad_norm": 112.1488037109375, |
|
"learning_rate": 5.72737686139748e-06, |
|
"loss": 0.1306, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 24.003092783505156, |
|
"grad_norm": 0.02043057605624199, |
|
"learning_rate": 5.715922107674685e-06, |
|
"loss": 0.091, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 24.004123711340206, |
|
"grad_norm": 51.4376220703125, |
|
"learning_rate": 5.70446735395189e-06, |
|
"loss": 0.2416, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 24.00515463917526, |
|
"grad_norm": 47.24472427368164, |
|
"learning_rate": 5.693012600229096e-06, |
|
"loss": 0.4605, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 24.00618556701031, |
|
"grad_norm": 164.00759887695312, |
|
"learning_rate": 5.6815578465063e-06, |
|
"loss": 0.3363, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 24.007216494845363, |
|
"grad_norm": 0.009168693795800209, |
|
"learning_rate": 5.670103092783505e-06, |
|
"loss": 0.0162, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 24.008247422680412, |
|
"grad_norm": 38.39619827270508, |
|
"learning_rate": 5.658648339060711e-06, |
|
"loss": 0.4564, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 24.009278350515466, |
|
"grad_norm": 1.3691141605377197, |
|
"learning_rate": 5.647193585337916e-06, |
|
"loss": 0.2679, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 24.010309278350515, |
|
"grad_norm": 269.83489990234375, |
|
"learning_rate": 5.6357388316151204e-06, |
|
"loss": 0.4089, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 24.01134020618557, |
|
"grad_norm": 0.0756688266992569, |
|
"learning_rate": 5.624284077892326e-06, |
|
"loss": 0.1391, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 24.01237113402062, |
|
"grad_norm": 0.003854341572150588, |
|
"learning_rate": 5.612829324169531e-06, |
|
"loss": 0.548, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 24.013402061855672, |
|
"grad_norm": 93.3668212890625, |
|
"learning_rate": 5.601374570446736e-06, |
|
"loss": 0.2312, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 24.01443298969072, |
|
"grad_norm": 0.05672283470630646, |
|
"learning_rate": 5.589919816723941e-06, |
|
"loss": 0.1191, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 24.015463917525775, |
|
"grad_norm": 2.6616814136505127, |
|
"learning_rate": 5.578465063001146e-06, |
|
"loss": 0.0088, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 24.016494845360825, |
|
"grad_norm": 1.235160231590271, |
|
"learning_rate": 5.567010309278351e-06, |
|
"loss": 0.2314, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 24.017525773195878, |
|
"grad_norm": 14.321471214294434, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.35, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 24.018556701030928, |
|
"grad_norm": 0.02161994017660618, |
|
"learning_rate": 5.544100801832761e-06, |
|
"loss": 0.4611, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 24.01958762886598, |
|
"grad_norm": 0.03108271211385727, |
|
"learning_rate": 5.532646048109966e-06, |
|
"loss": 0.436, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 24.020103092783504, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.5507913827896118, |
|
"eval_runtime": 12.8715, |
|
"eval_samples_per_second": 3.496, |
|
"eval_steps_per_second": 0.932, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 25.000515463917527, |
|
"grad_norm": 161.4268035888672, |
|
"learning_rate": 5.521191294387172e-06, |
|
"loss": 0.5077, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 25.001546391752576, |
|
"grad_norm": 0.3123011291027069, |
|
"learning_rate": 5.509736540664376e-06, |
|
"loss": 0.1336, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 25.00257731958763, |
|
"grad_norm": 138.39981079101562, |
|
"learning_rate": 5.4982817869415815e-06, |
|
"loss": 0.5236, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 25.00360824742268, |
|
"grad_norm": 0.1495334655046463, |
|
"learning_rate": 5.486827033218787e-06, |
|
"loss": 0.3321, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 25.004639175257733, |
|
"grad_norm": 122.72096252441406, |
|
"learning_rate": 5.475372279495992e-06, |
|
"loss": 0.7098, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 25.005670103092783, |
|
"grad_norm": 13.088062286376953, |
|
"learning_rate": 5.463917525773196e-06, |
|
"loss": 0.3201, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 25.006701030927836, |
|
"grad_norm": 0.04432636499404907, |
|
"learning_rate": 5.452462772050401e-06, |
|
"loss": 0.1142, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 25.007731958762886, |
|
"grad_norm": 0.874567985534668, |
|
"learning_rate": 5.441008018327606e-06, |
|
"loss": 0.1487, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 25.00876288659794, |
|
"grad_norm": 0.19093450903892517, |
|
"learning_rate": 5.429553264604811e-06, |
|
"loss": 0.1065, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 25.00979381443299, |
|
"grad_norm": 0.11419642716646194, |
|
"learning_rate": 5.418098510882016e-06, |
|
"loss": 0.199, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 25.010824742268042, |
|
"grad_norm": 4.453356742858887, |
|
"learning_rate": 5.406643757159221e-06, |
|
"loss": 0.0043, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 25.011855670103092, |
|
"grad_norm": 0.0850525051355362, |
|
"learning_rate": 5.395189003436427e-06, |
|
"loss": 0.0794, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 25.012886597938145, |
|
"grad_norm": 0.13754314184188843, |
|
"learning_rate": 5.383734249713631e-06, |
|
"loss": 0.2092, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 25.013917525773195, |
|
"grad_norm": 157.50311279296875, |
|
"learning_rate": 5.372279495990836e-06, |
|
"loss": 0.4186, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 25.01494845360825, |
|
"grad_norm": 0.975379228591919, |
|
"learning_rate": 5.360824742268042e-06, |
|
"loss": 0.1117, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 25.015979381443298, |
|
"grad_norm": 0.030048053711652756, |
|
"learning_rate": 5.349369988545247e-06, |
|
"loss": 0.1822, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 25.01701030927835, |
|
"grad_norm": 0.09299586713314056, |
|
"learning_rate": 5.337915234822451e-06, |
|
"loss": 0.2461, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 25.0180412371134, |
|
"grad_norm": 0.050019096583127975, |
|
"learning_rate": 5.326460481099657e-06, |
|
"loss": 0.024, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 25.019072164948454, |
|
"grad_norm": 152.73289489746094, |
|
"learning_rate": 5.315005727376862e-06, |
|
"loss": 0.2815, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 25.020103092783504, |
|
"grad_norm": 0.0027058201376348734, |
|
"learning_rate": 5.303550973654067e-06, |
|
"loss": 0.379, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 25.020103092783504, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 1.6197640895843506, |
|
"eval_runtime": 12.8821, |
|
"eval_samples_per_second": 3.493, |
|
"eval_steps_per_second": 0.932, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 26.001030927835053, |
|
"grad_norm": 0.036444660276174545, |
|
"learning_rate": 5.292096219931272e-06, |
|
"loss": 0.2972, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 26.002061855670103, |
|
"grad_norm": 8.088865280151367, |
|
"learning_rate": 5.280641466208477e-06, |
|
"loss": 0.0962, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 26.003092783505156, |
|
"grad_norm": 0.029521504417061806, |
|
"learning_rate": 5.269186712485682e-06, |
|
"loss": 0.0095, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 26.004123711340206, |
|
"grad_norm": 0.20875123143196106, |
|
"learning_rate": 5.257731958762888e-06, |
|
"loss": 0.4124, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 26.00515463917526, |
|
"grad_norm": 148.48397827148438, |
|
"learning_rate": 5.246277205040092e-06, |
|
"loss": 0.3919, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 26.00618556701031, |
|
"grad_norm": 0.03706370294094086, |
|
"learning_rate": 5.234822451317297e-06, |
|
"loss": 0.1514, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 26.007216494845363, |
|
"grad_norm": 0.0603543259203434, |
|
"learning_rate": 5.223367697594503e-06, |
|
"loss": 0.085, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 26.008247422680412, |
|
"grad_norm": 268.0271301269531, |
|
"learning_rate": 5.211912943871708e-06, |
|
"loss": 0.1849, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 26.009278350515466, |
|
"grad_norm": 0.05716723948717117, |
|
"learning_rate": 5.200458190148912e-06, |
|
"loss": 0.0813, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 26.010309278350515, |
|
"grad_norm": 36.733333587646484, |
|
"learning_rate": 5.189003436426118e-06, |
|
"loss": 0.1493, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 26.01134020618557, |
|
"grad_norm": 73.86190032958984, |
|
"learning_rate": 5.177548682703323e-06, |
|
"loss": 0.1373, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 26.01237113402062, |
|
"grad_norm": 0.04079816862940788, |
|
"learning_rate": 5.166093928980528e-06, |
|
"loss": 0.2272, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 26.013402061855672, |
|
"grad_norm": 0.07667230069637299, |
|
"learning_rate": 5.154639175257732e-06, |
|
"loss": 0.0554, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 26.01443298969072, |
|
"grad_norm": 0.8772750496864319, |
|
"learning_rate": 5.143184421534937e-06, |
|
"loss": 0.2251, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 26.015463917525775, |
|
"grad_norm": 0.012073386460542679, |
|
"learning_rate": 5.131729667812142e-06, |
|
"loss": 0.098, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 26.016494845360825, |
|
"grad_norm": 0.23029600083827972, |
|
"learning_rate": 5.120274914089347e-06, |
|
"loss": 0.1361, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 26.017525773195878, |
|
"grad_norm": 152.81813049316406, |
|
"learning_rate": 5.108820160366552e-06, |
|
"loss": 0.1956, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 26.018556701030928, |
|
"grad_norm": 0.01020987518131733, |
|
"learning_rate": 5.0973654066437575e-06, |
|
"loss": 0.3316, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 26.01958762886598, |
|
"grad_norm": 0.03010565973818302, |
|
"learning_rate": 5.085910652920962e-06, |
|
"loss": 0.0726, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 26.020103092783504, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 2.329340696334839, |
|
"eval_runtime": 12.9666, |
|
"eval_samples_per_second": 3.47, |
|
"eval_steps_per_second": 0.925, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 27.000515463917527, |
|
"grad_norm": 301.4482116699219, |
|
"learning_rate": 5.074455899198167e-06, |
|
"loss": 0.3906, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 27.001546391752576, |
|
"grad_norm": 0.012246874161064625, |
|
"learning_rate": 5.0630011454753726e-06, |
|
"loss": 0.3566, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 27.00257731958763, |
|
"grad_norm": 0.036673251539468765, |
|
"learning_rate": 5.051546391752578e-06, |
|
"loss": 0.0058, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 27.00360824742268, |
|
"grad_norm": 176.9713592529297, |
|
"learning_rate": 5.040091638029782e-06, |
|
"loss": 0.0528, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 27.004639175257733, |
|
"grad_norm": 110.79297637939453, |
|
"learning_rate": 5.028636884306988e-06, |
|
"loss": 0.1249, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 27.005670103092783, |
|
"grad_norm": 37.024574279785156, |
|
"learning_rate": 5.017182130584193e-06, |
|
"loss": 0.2464, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 27.006701030927836, |
|
"grad_norm": 0.11262047290802002, |
|
"learning_rate": 5.005727376861398e-06, |
|
"loss": 0.1313, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 27.007731958762886, |
|
"grad_norm": 0.6237982511520386, |
|
"learning_rate": 4.994272623138603e-06, |
|
"loss": 0.2988, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 27.00876288659794, |
|
"grad_norm": 0.011754998005926609, |
|
"learning_rate": 4.982817869415808e-06, |
|
"loss": 0.5804, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 27.00979381443299, |
|
"grad_norm": 0.027967087924480438, |
|
"learning_rate": 4.971363115693013e-06, |
|
"loss": 0.4306, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 27.010824742268042, |
|
"grad_norm": 0.041461341083049774, |
|
"learning_rate": 4.9599083619702185e-06, |
|
"loss": 0.0075, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 27.011855670103092, |
|
"grad_norm": 0.10922248661518097, |
|
"learning_rate": 4.948453608247423e-06, |
|
"loss": 0.5134, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 27.012886597938145, |
|
"grad_norm": 0.01793413981795311, |
|
"learning_rate": 4.936998854524628e-06, |
|
"loss": 0.4059, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 27.013917525773195, |
|
"grad_norm": 0.12545382976531982, |
|
"learning_rate": 4.9255441008018336e-06, |
|
"loss": 0.0593, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 27.01494845360825, |
|
"grad_norm": 0.23186025023460388, |
|
"learning_rate": 4.914089347079038e-06, |
|
"loss": 0.1651, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 27.015979381443298, |
|
"grad_norm": 24.21805191040039, |
|
"learning_rate": 4.902634593356243e-06, |
|
"loss": 0.5709, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 27.01701030927835, |
|
"grad_norm": 0.19743561744689941, |
|
"learning_rate": 4.891179839633448e-06, |
|
"loss": 0.2359, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 27.0180412371134, |
|
"grad_norm": 99.03250122070312, |
|
"learning_rate": 4.879725085910653e-06, |
|
"loss": 0.3897, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 27.019072164948454, |
|
"grad_norm": 0.35776469111442566, |
|
"learning_rate": 4.868270332187858e-06, |
|
"loss": 0.1425, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 27.020103092783504, |
|
"grad_norm": 0.001978857209905982, |
|
"learning_rate": 4.856815578465064e-06, |
|
"loss": 0.0099, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 27.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.765753984451294, |
|
"eval_runtime": 12.8308, |
|
"eval_samples_per_second": 3.507, |
|
"eval_steps_per_second": 0.935, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 28.001030927835053, |
|
"grad_norm": 0.10768745839595795, |
|
"learning_rate": 4.845360824742268e-06, |
|
"loss": 0.2311, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 28.002061855670103, |
|
"grad_norm": 57.12821960449219, |
|
"learning_rate": 4.833906071019473e-06, |
|
"loss": 0.1598, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 28.003092783505156, |
|
"grad_norm": 0.11094794422388077, |
|
"learning_rate": 4.822451317296679e-06, |
|
"loss": 0.2435, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 28.004123711340206, |
|
"grad_norm": 0.04346878081560135, |
|
"learning_rate": 4.810996563573884e-06, |
|
"loss": 0.4259, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.00515463917526, |
|
"grad_norm": 99.23379516601562, |
|
"learning_rate": 4.7995418098510884e-06, |
|
"loss": 0.4067, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 28.00618556701031, |
|
"grad_norm": 0.011886666528880596, |
|
"learning_rate": 4.788087056128294e-06, |
|
"loss": 0.3366, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 28.007216494845363, |
|
"grad_norm": 17.010530471801758, |
|
"learning_rate": 4.776632302405499e-06, |
|
"loss": 0.5229, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 28.008247422680412, |
|
"grad_norm": 84.538818359375, |
|
"learning_rate": 4.7651775486827035e-06, |
|
"loss": 0.2171, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 28.009278350515466, |
|
"grad_norm": 0.029075944796204567, |
|
"learning_rate": 4.753722794959909e-06, |
|
"loss": 0.2936, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 28.010309278350515, |
|
"grad_norm": 0.5513918399810791, |
|
"learning_rate": 4.742268041237113e-06, |
|
"loss": 0.1806, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 28.01134020618557, |
|
"grad_norm": 0.14319252967834473, |
|
"learning_rate": 4.7308132875143185e-06, |
|
"loss": 0.5482, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 28.01237113402062, |
|
"grad_norm": 0.33188721537590027, |
|
"learning_rate": 4.719358533791524e-06, |
|
"loss": 0.2692, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 28.013402061855672, |
|
"grad_norm": 0.03212364390492439, |
|
"learning_rate": 4.707903780068729e-06, |
|
"loss": 0.0034, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 28.01443298969072, |
|
"grad_norm": 0.03373891860246658, |
|
"learning_rate": 4.6964490263459336e-06, |
|
"loss": 0.0255, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 28.015463917525775, |
|
"grad_norm": 200.98361206054688, |
|
"learning_rate": 4.684994272623139e-06, |
|
"loss": 0.3354, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 28.016494845360825, |
|
"grad_norm": 41.92877960205078, |
|
"learning_rate": 4.673539518900344e-06, |
|
"loss": 0.0269, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 28.017525773195878, |
|
"grad_norm": 0.11387676745653152, |
|
"learning_rate": 4.6620847651775494e-06, |
|
"loss": 0.0015, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 28.018556701030928, |
|
"grad_norm": 0.05513579770922661, |
|
"learning_rate": 4.650630011454754e-06, |
|
"loss": 0.4027, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 28.01958762886598, |
|
"grad_norm": 0.003919053822755814, |
|
"learning_rate": 4.639175257731959e-06, |
|
"loss": 0.0346, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 28.020103092783504, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 1.5937350988388062, |
|
"eval_runtime": 11.439, |
|
"eval_samples_per_second": 3.934, |
|
"eval_steps_per_second": 1.049, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 29.000515463917527, |
|
"grad_norm": 0.547856867313385, |
|
"learning_rate": 4.6277205040091645e-06, |
|
"loss": 0.1152, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 29.001546391752576, |
|
"grad_norm": 0.010174279101192951, |
|
"learning_rate": 4.61626575028637e-06, |
|
"loss": 0.1109, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 29.00257731958763, |
|
"grad_norm": 0.08245531469583511, |
|
"learning_rate": 4.604810996563574e-06, |
|
"loss": 0.3028, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 29.00360824742268, |
|
"grad_norm": 0.2131088227033615, |
|
"learning_rate": 4.593356242840779e-06, |
|
"loss": 0.1538, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 29.004639175257733, |
|
"grad_norm": 128.2372589111328, |
|
"learning_rate": 4.581901489117984e-06, |
|
"loss": 0.4187, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 29.005670103092783, |
|
"grad_norm": 3.6024224758148193, |
|
"learning_rate": 4.570446735395189e-06, |
|
"loss": 0.1509, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 29.006701030927836, |
|
"grad_norm": 6.2508931159973145, |
|
"learning_rate": 4.5589919816723946e-06, |
|
"loss": 0.3448, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 29.007731958762886, |
|
"grad_norm": 25.978151321411133, |
|
"learning_rate": 4.547537227949599e-06, |
|
"loss": 0.378, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 29.00876288659794, |
|
"grad_norm": 97.8603286743164, |
|
"learning_rate": 4.536082474226804e-06, |
|
"loss": 0.3905, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 29.00979381443299, |
|
"grad_norm": 58.110443115234375, |
|
"learning_rate": 4.52462772050401e-06, |
|
"loss": 0.1606, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 29.010824742268042, |
|
"grad_norm": 0.03289901092648506, |
|
"learning_rate": 4.513172966781215e-06, |
|
"loss": 0.0116, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 29.011855670103092, |
|
"grad_norm": 0.4851415455341339, |
|
"learning_rate": 4.501718213058419e-06, |
|
"loss": 0.0014, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 29.012886597938145, |
|
"grad_norm": 0.6099609732627869, |
|
"learning_rate": 4.490263459335625e-06, |
|
"loss": 0.2822, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 29.013917525773195, |
|
"grad_norm": 0.035203512758016586, |
|
"learning_rate": 4.47880870561283e-06, |
|
"loss": 0.1903, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 29.01494845360825, |
|
"grad_norm": 0.075258269906044, |
|
"learning_rate": 4.467353951890035e-06, |
|
"loss": 0.2484, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 29.015979381443298, |
|
"grad_norm": 0.014334428124129772, |
|
"learning_rate": 4.45589919816724e-06, |
|
"loss": 0.2231, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 29.01701030927835, |
|
"grad_norm": 0.03960711136460304, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.203, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 29.0180412371134, |
|
"grad_norm": 0.082605741918087, |
|
"learning_rate": 4.4329896907216494e-06, |
|
"loss": 0.009, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 29.019072164948454, |
|
"grad_norm": 1.0183836221694946, |
|
"learning_rate": 4.421534936998855e-06, |
|
"loss": 0.3987, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 29.020103092783504, |
|
"grad_norm": 0.003887833096086979, |
|
"learning_rate": 4.41008018327606e-06, |
|
"loss": 0.0058, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 29.020103092783504, |
|
"eval_accuracy": 0.6444444444444445, |
|
"eval_loss": 2.351128578186035, |
|
"eval_runtime": 12.9569, |
|
"eval_samples_per_second": 3.473, |
|
"eval_steps_per_second": 0.926, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 30.001030927835053, |
|
"grad_norm": 5.273438453674316, |
|
"learning_rate": 4.3986254295532645e-06, |
|
"loss": 0.4555, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 30.002061855670103, |
|
"grad_norm": 0.054419275373220444, |
|
"learning_rate": 4.38717067583047e-06, |
|
"loss": 0.0564, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 30.003092783505156, |
|
"grad_norm": 23.50123405456543, |
|
"learning_rate": 4.375715922107675e-06, |
|
"loss": 0.0966, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 30.004123711340206, |
|
"grad_norm": 196.958251953125, |
|
"learning_rate": 4.36426116838488e-06, |
|
"loss": 0.133, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 30.00515463917526, |
|
"grad_norm": 52.535770416259766, |
|
"learning_rate": 4.352806414662085e-06, |
|
"loss": 0.2876, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 30.00618556701031, |
|
"grad_norm": 0.10040858387947083, |
|
"learning_rate": 4.34135166093929e-06, |
|
"loss": 0.2648, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 30.007216494845363, |
|
"grad_norm": 0.025616025552153587, |
|
"learning_rate": 4.329896907216495e-06, |
|
"loss": 0.3765, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 30.008247422680412, |
|
"grad_norm": 0.006266108714044094, |
|
"learning_rate": 4.318442153493701e-06, |
|
"loss": 0.1254, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 30.009278350515466, |
|
"grad_norm": 0.8512455224990845, |
|
"learning_rate": 4.306987399770905e-06, |
|
"loss": 0.0057, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 30.010309278350515, |
|
"grad_norm": 0.033568281680345535, |
|
"learning_rate": 4.2955326460481105e-06, |
|
"loss": 0.1417, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 30.01134020618557, |
|
"grad_norm": 30.20100212097168, |
|
"learning_rate": 4.284077892325315e-06, |
|
"loss": 0.0088, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 30.01237113402062, |
|
"grad_norm": 0.2173480987548828, |
|
"learning_rate": 4.27262313860252e-06, |
|
"loss": 0.1447, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 30.013402061855672, |
|
"grad_norm": 0.009785875678062439, |
|
"learning_rate": 4.2611683848797255e-06, |
|
"loss": 0.002, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 30.01443298969072, |
|
"grad_norm": 0.0599403902888298, |
|
"learning_rate": 4.24971363115693e-06, |
|
"loss": 0.0011, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 30.015463917525775, |
|
"grad_norm": 0.23365233838558197, |
|
"learning_rate": 4.238258877434135e-06, |
|
"loss": 0.2509, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 30.016494845360825, |
|
"grad_norm": 0.14375662803649902, |
|
"learning_rate": 4.2268041237113405e-06, |
|
"loss": 0.4676, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 30.017525773195878, |
|
"grad_norm": 0.03053143061697483, |
|
"learning_rate": 4.215349369988546e-06, |
|
"loss": 0.1415, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 30.018556701030928, |
|
"grad_norm": 0.08184775710105896, |
|
"learning_rate": 4.20389461626575e-06, |
|
"loss": 0.0848, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 30.01958762886598, |
|
"grad_norm": 0.6414709091186523, |
|
"learning_rate": 4.192439862542956e-06, |
|
"loss": 0.1163, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 30.020103092783504, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.7067806720733643, |
|
"eval_runtime": 13.9465, |
|
"eval_samples_per_second": 3.227, |
|
"eval_steps_per_second": 0.86, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 31.000515463917527, |
|
"grad_norm": 0.016645895317196846, |
|
"learning_rate": 4.180985108820161e-06, |
|
"loss": 0.4294, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 31.001546391752576, |
|
"grad_norm": 0.7525324821472168, |
|
"learning_rate": 4.169530355097366e-06, |
|
"loss": 0.4195, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 31.00257731958763, |
|
"grad_norm": 55.71934127807617, |
|
"learning_rate": 4.158075601374571e-06, |
|
"loss": 0.2149, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 31.00360824742268, |
|
"grad_norm": 0.006537964567542076, |
|
"learning_rate": 4.146620847651776e-06, |
|
"loss": 0.4038, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 31.004639175257733, |
|
"grad_norm": 2.2747881412506104, |
|
"learning_rate": 4.13516609392898e-06, |
|
"loss": 0.33, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 31.005670103092783, |
|
"grad_norm": 0.04077708348631859, |
|
"learning_rate": 4.123711340206186e-06, |
|
"loss": 0.0016, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 31.006701030927836, |
|
"grad_norm": 2.354952573776245, |
|
"learning_rate": 4.112256586483391e-06, |
|
"loss": 0.4812, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 31.007731958762886, |
|
"grad_norm": 137.84132385253906, |
|
"learning_rate": 4.100801832760596e-06, |
|
"loss": 0.2663, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 31.00876288659794, |
|
"grad_norm": 0.071143738925457, |
|
"learning_rate": 4.089347079037801e-06, |
|
"loss": 0.0608, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 31.00979381443299, |
|
"grad_norm": 0.9687093496322632, |
|
"learning_rate": 4.077892325315006e-06, |
|
"loss": 0.2852, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 31.010824742268042, |
|
"grad_norm": 0.061937566846609116, |
|
"learning_rate": 4.066437571592211e-06, |
|
"loss": 0.2822, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 31.011855670103092, |
|
"grad_norm": 51.97724914550781, |
|
"learning_rate": 4.054982817869416e-06, |
|
"loss": 0.4565, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 31.012886597938145, |
|
"grad_norm": 0.051208335906267166, |
|
"learning_rate": 4.043528064146621e-06, |
|
"loss": 0.0235, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 31.013917525773195, |
|
"grad_norm": 0.028698451817035675, |
|
"learning_rate": 4.032073310423826e-06, |
|
"loss": 0.0208, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 31.01494845360825, |
|
"grad_norm": 0.02642163261771202, |
|
"learning_rate": 4.020618556701032e-06, |
|
"loss": 0.0016, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 31.015979381443298, |
|
"grad_norm": 0.019867481663823128, |
|
"learning_rate": 4.009163802978236e-06, |
|
"loss": 0.3667, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 31.01701030927835, |
|
"grad_norm": 0.12207566946744919, |
|
"learning_rate": 3.997709049255441e-06, |
|
"loss": 0.0483, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 31.0180412371134, |
|
"grad_norm": 0.0024308476131409407, |
|
"learning_rate": 3.986254295532647e-06, |
|
"loss": 0.2048, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 31.019072164948454, |
|
"grad_norm": 0.3703158497810364, |
|
"learning_rate": 3.974799541809851e-06, |
|
"loss": 0.4534, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 31.020103092783504, |
|
"grad_norm": 0.07688494771718979, |
|
"learning_rate": 3.9633447880870564e-06, |
|
"loss": 0.0962, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 31.020103092783504, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.8766827583312988, |
|
"eval_runtime": 12.7435, |
|
"eval_samples_per_second": 3.531, |
|
"eval_steps_per_second": 0.942, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 32.00103092783505, |
|
"grad_norm": 3.0947821140289307, |
|
"learning_rate": 3.951890034364262e-06, |
|
"loss": 0.003, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 32.00206185567011, |
|
"grad_norm": 0.021548155695199966, |
|
"learning_rate": 3.940435280641466e-06, |
|
"loss": 0.2464, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 32.00309278350515, |
|
"grad_norm": 0.010396094061434269, |
|
"learning_rate": 3.9289805269186715e-06, |
|
"loss": 0.1475, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 32.004123711340206, |
|
"grad_norm": 0.26135310530662537, |
|
"learning_rate": 3.917525773195877e-06, |
|
"loss": 0.0456, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 32.00515463917526, |
|
"grad_norm": 0.04791492596268654, |
|
"learning_rate": 3.906071019473082e-06, |
|
"loss": 0.0836, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 32.00618556701031, |
|
"grad_norm": 0.00865848921239376, |
|
"learning_rate": 3.8946162657502865e-06, |
|
"loss": 0.0589, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 32.00721649484536, |
|
"grad_norm": 0.04501219838857651, |
|
"learning_rate": 3.883161512027492e-06, |
|
"loss": 0.1143, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 32.00824742268041, |
|
"grad_norm": 0.049783822149038315, |
|
"learning_rate": 3.871706758304697e-06, |
|
"loss": 0.1852, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 32.009278350515466, |
|
"grad_norm": 0.031185345724225044, |
|
"learning_rate": 3.8602520045819016e-06, |
|
"loss": 0.3198, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 32.01030927835052, |
|
"grad_norm": 139.4656219482422, |
|
"learning_rate": 3.848797250859107e-06, |
|
"loss": 0.0602, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 32.011340206185565, |
|
"grad_norm": 155.99598693847656, |
|
"learning_rate": 3.837342497136312e-06, |
|
"loss": 0.133, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 32.01237113402062, |
|
"grad_norm": 0.06643953174352646, |
|
"learning_rate": 3.825887743413517e-06, |
|
"loss": 0.2172, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 32.01340206185567, |
|
"grad_norm": 149.49154663085938, |
|
"learning_rate": 3.814432989690722e-06, |
|
"loss": 0.2295, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 32.014432989690725, |
|
"grad_norm": 0.12383408844470978, |
|
"learning_rate": 3.8029782359679268e-06, |
|
"loss": 0.0736, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 32.01546391752577, |
|
"grad_norm": 46.82889938354492, |
|
"learning_rate": 3.791523482245132e-06, |
|
"loss": 0.2792, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 32.016494845360825, |
|
"grad_norm": 0.019881825894117355, |
|
"learning_rate": 3.780068728522337e-06, |
|
"loss": 0.1028, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 32.01752577319588, |
|
"grad_norm": 53.818206787109375, |
|
"learning_rate": 3.7686139747995422e-06, |
|
"loss": 0.5382, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 32.01855670103093, |
|
"grad_norm": 10.200157165527344, |
|
"learning_rate": 3.757159221076747e-06, |
|
"loss": 0.0028, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 32.01958762886598, |
|
"grad_norm": 0.9711626768112183, |
|
"learning_rate": 3.7457044673539524e-06, |
|
"loss": 0.2826, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 32.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.165703773498535, |
|
"eval_runtime": 14.5293, |
|
"eval_samples_per_second": 3.097, |
|
"eval_steps_per_second": 0.826, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 33.00051546391752, |
|
"grad_norm": 0.02483387477695942, |
|
"learning_rate": 3.7342497136311573e-06, |
|
"loss": 0.0014, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 33.001546391752576, |
|
"grad_norm": 65.55487060546875, |
|
"learning_rate": 3.7227949599083626e-06, |
|
"loss": 0.1579, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 33.00257731958763, |
|
"grad_norm": 0.09028110653162003, |
|
"learning_rate": 3.7113402061855674e-06, |
|
"loss": 0.2578, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 33.00360824742268, |
|
"grad_norm": 0.028494760394096375, |
|
"learning_rate": 3.6998854524627727e-06, |
|
"loss": 0.0175, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 33.00463917525773, |
|
"grad_norm": 0.015268037095665932, |
|
"learning_rate": 3.6884306987399776e-06, |
|
"loss": 0.1281, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 33.00567010309278, |
|
"grad_norm": 0.022972600534558296, |
|
"learning_rate": 3.6769759450171825e-06, |
|
"loss": 0.2622, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 33.006701030927836, |
|
"grad_norm": 0.022758277133107185, |
|
"learning_rate": 3.6655211912943874e-06, |
|
"loss": 0.0093, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 33.00773195876289, |
|
"grad_norm": 8.829773902893066, |
|
"learning_rate": 3.6540664375715922e-06, |
|
"loss": 0.597, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 33.008762886597935, |
|
"grad_norm": 0.006442221347242594, |
|
"learning_rate": 3.6426116838487975e-06, |
|
"loss": 0.1217, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 33.00979381443299, |
|
"grad_norm": 0.15540894865989685, |
|
"learning_rate": 3.6311569301260024e-06, |
|
"loss": 0.2546, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 33.01082474226804, |
|
"grad_norm": 1.448900580406189, |
|
"learning_rate": 3.6197021764032077e-06, |
|
"loss": 0.0054, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 33.011855670103095, |
|
"grad_norm": 0.1175546869635582, |
|
"learning_rate": 3.6082474226804126e-06, |
|
"loss": 0.0229, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 33.01288659793814, |
|
"grad_norm": 0.04827328771352768, |
|
"learning_rate": 3.596792668957618e-06, |
|
"loss": 0.2278, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 33.013917525773195, |
|
"grad_norm": 0.011951892636716366, |
|
"learning_rate": 3.5853379152348227e-06, |
|
"loss": 0.1539, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 33.01494845360825, |
|
"grad_norm": 0.05575776845216751, |
|
"learning_rate": 3.573883161512028e-06, |
|
"loss": 0.3902, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 33.0159793814433, |
|
"grad_norm": 43.80656051635742, |
|
"learning_rate": 3.562428407789233e-06, |
|
"loss": 0.011, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 33.01701030927835, |
|
"grad_norm": 1.3099844455718994, |
|
"learning_rate": 3.550973654066438e-06, |
|
"loss": 0.1933, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 33.0180412371134, |
|
"grad_norm": 0.03854267671704292, |
|
"learning_rate": 3.539518900343643e-06, |
|
"loss": 0.0416, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 33.019072164948454, |
|
"grad_norm": 0.01258633378893137, |
|
"learning_rate": 3.5280641466208484e-06, |
|
"loss": 0.2978, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 33.02010309278351, |
|
"grad_norm": 0.0009200758067891002, |
|
"learning_rate": 3.5166093928980532e-06, |
|
"loss": 0.1249, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 33.02010309278351, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 1.7385255098342896, |
|
"eval_runtime": 12.3632, |
|
"eval_samples_per_second": 3.64, |
|
"eval_steps_per_second": 0.971, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 34.00103092783505, |
|
"grad_norm": 0.1539161652326584, |
|
"learning_rate": 3.5051546391752577e-06, |
|
"loss": 0.1841, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 34.00206185567011, |
|
"grad_norm": 0.02649785578250885, |
|
"learning_rate": 3.493699885452463e-06, |
|
"loss": 0.2675, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 34.00309278350515, |
|
"grad_norm": 0.025071190670132637, |
|
"learning_rate": 3.482245131729668e-06, |
|
"loss": 0.0023, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 34.004123711340206, |
|
"grad_norm": 0.019204530864953995, |
|
"learning_rate": 3.470790378006873e-06, |
|
"loss": 0.4407, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 34.00515463917526, |
|
"grad_norm": 0.02421189844608307, |
|
"learning_rate": 3.459335624284078e-06, |
|
"loss": 0.0031, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 34.00618556701031, |
|
"grad_norm": 0.02116827294230461, |
|
"learning_rate": 3.4478808705612833e-06, |
|
"loss": 0.2766, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 34.00721649484536, |
|
"grad_norm": 0.0051218606531620026, |
|
"learning_rate": 3.436426116838488e-06, |
|
"loss": 0.6209, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 34.00824742268041, |
|
"grad_norm": 0.015079047530889511, |
|
"learning_rate": 3.4249713631156935e-06, |
|
"loss": 0.0015, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 34.009278350515466, |
|
"grad_norm": 1.001089096069336, |
|
"learning_rate": 3.4135166093928984e-06, |
|
"loss": 0.1645, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 34.01030927835052, |
|
"grad_norm": 0.0209248848259449, |
|
"learning_rate": 3.4020618556701037e-06, |
|
"loss": 0.0015, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 34.011340206185565, |
|
"grad_norm": 0.010426868684589863, |
|
"learning_rate": 3.3906071019473085e-06, |
|
"loss": 0.0104, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 34.01237113402062, |
|
"grad_norm": 0.06406555324792862, |
|
"learning_rate": 3.379152348224514e-06, |
|
"loss": 0.0415, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 34.01340206185567, |
|
"grad_norm": 222.54000854492188, |
|
"learning_rate": 3.3676975945017187e-06, |
|
"loss": 0.2297, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 34.014432989690725, |
|
"grad_norm": 0.2206907570362091, |
|
"learning_rate": 3.356242840778923e-06, |
|
"loss": 0.0033, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 34.01546391752577, |
|
"grad_norm": 0.0022467290982604027, |
|
"learning_rate": 3.3447880870561285e-06, |
|
"loss": 0.0908, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 34.016494845360825, |
|
"grad_norm": 281.9080505371094, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2918, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 34.01752577319588, |
|
"grad_norm": 0.13616985082626343, |
|
"learning_rate": 3.3218785796105386e-06, |
|
"loss": 0.1153, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 34.01855670103093, |
|
"grad_norm": 1.4510610103607178, |
|
"learning_rate": 3.3104238258877435e-06, |
|
"loss": 0.0018, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 34.01958762886598, |
|
"grad_norm": 0.008665881119668484, |
|
"learning_rate": 3.298969072164949e-06, |
|
"loss": 0.2191, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 34.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.178891181945801, |
|
"eval_runtime": 12.7411, |
|
"eval_samples_per_second": 3.532, |
|
"eval_steps_per_second": 0.942, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 35.00051546391752, |
|
"grad_norm": 0.05293448269367218, |
|
"learning_rate": 3.2875143184421537e-06, |
|
"loss": 0.1271, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 35.001546391752576, |
|
"grad_norm": 0.0611007995903492, |
|
"learning_rate": 3.276059564719359e-06, |
|
"loss": 0.1393, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 35.00257731958763, |
|
"grad_norm": 0.024918990209698677, |
|
"learning_rate": 3.264604810996564e-06, |
|
"loss": 0.1459, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 35.00360824742268, |
|
"grad_norm": 0.28028416633605957, |
|
"learning_rate": 3.253150057273769e-06, |
|
"loss": 0.1449, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 35.00463917525773, |
|
"grad_norm": 0.0019702534191310406, |
|
"learning_rate": 3.241695303550974e-06, |
|
"loss": 0.0093, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 35.00567010309278, |
|
"grad_norm": 7.490848064422607, |
|
"learning_rate": 3.2302405498281793e-06, |
|
"loss": 0.122, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 35.006701030927836, |
|
"grad_norm": 0.013970672152936459, |
|
"learning_rate": 3.218785796105384e-06, |
|
"loss": 0.0232, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 35.00773195876289, |
|
"grad_norm": 0.031982772052288055, |
|
"learning_rate": 3.2073310423825895e-06, |
|
"loss": 0.086, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 35.008762886597935, |
|
"grad_norm": 0.010435467585921288, |
|
"learning_rate": 3.195876288659794e-06, |
|
"loss": 0.3166, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 35.00979381443299, |
|
"grad_norm": 86.65909576416016, |
|
"learning_rate": 3.1844215349369988e-06, |
|
"loss": 0.3312, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 35.01082474226804, |
|
"grad_norm": 122.04566955566406, |
|
"learning_rate": 3.172966781214204e-06, |
|
"loss": 0.2556, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 35.011855670103095, |
|
"grad_norm": 0.12888219952583313, |
|
"learning_rate": 3.161512027491409e-06, |
|
"loss": 0.3073, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 35.01288659793814, |
|
"grad_norm": 0.025842690840363503, |
|
"learning_rate": 3.1500572737686143e-06, |
|
"loss": 0.1798, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 35.013917525773195, |
|
"grad_norm": 0.011454744264483452, |
|
"learning_rate": 3.138602520045819e-06, |
|
"loss": 0.0072, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 35.01494845360825, |
|
"grad_norm": 167.35546875, |
|
"learning_rate": 3.1271477663230244e-06, |
|
"loss": 0.071, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 35.0159793814433, |
|
"grad_norm": 0.024680141359567642, |
|
"learning_rate": 3.1156930126002293e-06, |
|
"loss": 0.4104, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 35.01701030927835, |
|
"grad_norm": 0.008994595147669315, |
|
"learning_rate": 3.1042382588774346e-06, |
|
"loss": 0.1758, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 35.0180412371134, |
|
"grad_norm": 46.581077575683594, |
|
"learning_rate": 3.0927835051546395e-06, |
|
"loss": 0.1648, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 35.019072164948454, |
|
"grad_norm": 199.21475219726562, |
|
"learning_rate": 3.0813287514318448e-06, |
|
"loss": 0.264, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 35.02010309278351, |
|
"grad_norm": 0.001545836334116757, |
|
"learning_rate": 3.0698739977090496e-06, |
|
"loss": 0.0958, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 35.02010309278351, |
|
"eval_accuracy": 0.6444444444444445, |
|
"eval_loss": 2.472182035446167, |
|
"eval_runtime": 12.7933, |
|
"eval_samples_per_second": 3.517, |
|
"eval_steps_per_second": 0.938, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 36.00103092783505, |
|
"grad_norm": 0.045767877250909805, |
|
"learning_rate": 3.058419243986255e-06, |
|
"loss": 0.2266, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 36.00206185567011, |
|
"grad_norm": 0.014738555997610092, |
|
"learning_rate": 3.04696449026346e-06, |
|
"loss": 0.0054, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 36.00309278350515, |
|
"grad_norm": 0.00678382720798254, |
|
"learning_rate": 3.0355097365406643e-06, |
|
"loss": 0.3692, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 36.004123711340206, |
|
"grad_norm": 0.00942118652164936, |
|
"learning_rate": 3.0240549828178695e-06, |
|
"loss": 0.0014, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 36.00515463917526, |
|
"grad_norm": 0.024564573541283607, |
|
"learning_rate": 3.0126002290950744e-06, |
|
"loss": 0.1198, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 36.00618556701031, |
|
"grad_norm": 271.1005859375, |
|
"learning_rate": 3.0011454753722797e-06, |
|
"loss": 0.2814, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 36.00721649484536, |
|
"grad_norm": 0.043836403638124466, |
|
"learning_rate": 2.9896907216494846e-06, |
|
"loss": 0.0023, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 36.00824742268041, |
|
"grad_norm": 0.008552830666303635, |
|
"learning_rate": 2.97823596792669e-06, |
|
"loss": 0.0138, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 36.009278350515466, |
|
"grad_norm": 0.2735336124897003, |
|
"learning_rate": 2.9667812142038948e-06, |
|
"loss": 0.0027, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 36.01030927835052, |
|
"grad_norm": 0.0645265206694603, |
|
"learning_rate": 2.9553264604811e-06, |
|
"loss": 0.1183, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 36.011340206185565, |
|
"grad_norm": 0.035903312265872955, |
|
"learning_rate": 2.943871706758305e-06, |
|
"loss": 0.014, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 36.01237113402062, |
|
"grad_norm": 4.987162113189697, |
|
"learning_rate": 2.9324169530355102e-06, |
|
"loss": 0.1061, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 36.01340206185567, |
|
"grad_norm": 0.45441123843193054, |
|
"learning_rate": 2.920962199312715e-06, |
|
"loss": 0.0013, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 36.014432989690725, |
|
"grad_norm": 1.8816462755203247, |
|
"learning_rate": 2.9095074455899204e-06, |
|
"loss": 0.0026, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 36.01546391752577, |
|
"grad_norm": 0.005839156918227673, |
|
"learning_rate": 2.8980526918671253e-06, |
|
"loss": 0.0038, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 36.016494845360825, |
|
"grad_norm": 0.5239657163619995, |
|
"learning_rate": 2.8865979381443297e-06, |
|
"loss": 0.0005, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 36.01752577319588, |
|
"grad_norm": 60.700618743896484, |
|
"learning_rate": 2.875143184421535e-06, |
|
"loss": 0.1786, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 36.01855670103093, |
|
"grad_norm": 0.0037163153756409883, |
|
"learning_rate": 2.86368843069874e-06, |
|
"loss": 0.4697, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 36.01958762886598, |
|
"grad_norm": 0.007669605780392885, |
|
"learning_rate": 2.852233676975945e-06, |
|
"loss": 0.0006, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 36.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 1.917656421661377, |
|
"eval_runtime": 12.6565, |
|
"eval_samples_per_second": 3.555, |
|
"eval_steps_per_second": 0.948, |
|
"step": 7215 |
|
}, |
|
{ |
|
"epoch": 37.00051546391752, |
|
"grad_norm": 0.019232606515288353, |
|
"learning_rate": 2.84077892325315e-06, |
|
"loss": 0.1498, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 37.001546391752576, |
|
"grad_norm": 0.006618044804781675, |
|
"learning_rate": 2.8293241695303553e-06, |
|
"loss": 0.0006, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 37.00257731958763, |
|
"grad_norm": 32.33464813232422, |
|
"learning_rate": 2.8178694158075602e-06, |
|
"loss": 0.1678, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 37.00360824742268, |
|
"grad_norm": 0.02818481996655464, |
|
"learning_rate": 2.8064146620847655e-06, |
|
"loss": 0.2838, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 37.00463917525773, |
|
"grad_norm": 136.29965209960938, |
|
"learning_rate": 2.7949599083619704e-06, |
|
"loss": 0.0919, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 37.00567010309278, |
|
"grad_norm": 0.004848357755690813, |
|
"learning_rate": 2.7835051546391757e-06, |
|
"loss": 0.1393, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 37.006701030927836, |
|
"grad_norm": 91.26074981689453, |
|
"learning_rate": 2.7720504009163806e-06, |
|
"loss": 0.293, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 37.00773195876289, |
|
"grad_norm": 0.008552628569304943, |
|
"learning_rate": 2.760595647193586e-06, |
|
"loss": 0.0011, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 37.008762886597935, |
|
"grad_norm": 0.011269760318100452, |
|
"learning_rate": 2.7491408934707907e-06, |
|
"loss": 0.0008, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 37.00979381443299, |
|
"grad_norm": 0.07644051313400269, |
|
"learning_rate": 2.737686139747996e-06, |
|
"loss": 0.2102, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 37.01082474226804, |
|
"grad_norm": 0.0022292486391961575, |
|
"learning_rate": 2.7262313860252005e-06, |
|
"loss": 0.1402, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 37.011855670103095, |
|
"grad_norm": 7.599360466003418, |
|
"learning_rate": 2.7147766323024053e-06, |
|
"loss": 0.0044, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 37.01288659793814, |
|
"grad_norm": 220.58099365234375, |
|
"learning_rate": 2.7033218785796106e-06, |
|
"loss": 0.4214, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 37.013917525773195, |
|
"grad_norm": 83.0787353515625, |
|
"learning_rate": 2.6918671248568155e-06, |
|
"loss": 0.0265, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 37.01494845360825, |
|
"grad_norm": 0.015676092356443405, |
|
"learning_rate": 2.680412371134021e-06, |
|
"loss": 0.0018, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 37.0159793814433, |
|
"grad_norm": 0.008032367564737797, |
|
"learning_rate": 2.6689576174112257e-06, |
|
"loss": 0.0151, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 37.01701030927835, |
|
"grad_norm": 0.36304762959480286, |
|
"learning_rate": 2.657502863688431e-06, |
|
"loss": 0.1789, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 37.0180412371134, |
|
"grad_norm": 0.010707261972129345, |
|
"learning_rate": 2.646048109965636e-06, |
|
"loss": 0.0672, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 37.019072164948454, |
|
"grad_norm": 110.2496566772461, |
|
"learning_rate": 2.634593356242841e-06, |
|
"loss": 0.664, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 37.02010309278351, |
|
"grad_norm": 0.13198626041412354, |
|
"learning_rate": 2.623138602520046e-06, |
|
"loss": 0.0036, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 37.02010309278351, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.9590771198272705, |
|
"eval_runtime": 14.573, |
|
"eval_samples_per_second": 3.088, |
|
"eval_steps_per_second": 0.823, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 38.00103092783505, |
|
"grad_norm": 64.50994110107422, |
|
"learning_rate": 2.6116838487972513e-06, |
|
"loss": 0.3479, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 38.00206185567011, |
|
"grad_norm": 139.82247924804688, |
|
"learning_rate": 2.600229095074456e-06, |
|
"loss": 0.2922, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 38.00309278350515, |
|
"grad_norm": 0.011880365200340748, |
|
"learning_rate": 2.5887743413516615e-06, |
|
"loss": 0.0738, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 38.004123711340206, |
|
"grad_norm": 0.0019393068505451083, |
|
"learning_rate": 2.577319587628866e-06, |
|
"loss": 0.0053, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 38.00515463917526, |
|
"grad_norm": 0.007837596349418163, |
|
"learning_rate": 2.565864833906071e-06, |
|
"loss": 0.1809, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 38.00618556701031, |
|
"grad_norm": 0.051794491708278656, |
|
"learning_rate": 2.554410080183276e-06, |
|
"loss": 0.1372, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 38.00721649484536, |
|
"grad_norm": 0.11213437467813492, |
|
"learning_rate": 2.542955326460481e-06, |
|
"loss": 0.0021, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 38.00824742268041, |
|
"grad_norm": 0.008571717888116837, |
|
"learning_rate": 2.5315005727376863e-06, |
|
"loss": 0.2099, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 38.009278350515466, |
|
"grad_norm": 0.021522628143429756, |
|
"learning_rate": 2.520045819014891e-06, |
|
"loss": 0.214, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 38.01030927835052, |
|
"grad_norm": 0.24400968849658966, |
|
"learning_rate": 2.5085910652920964e-06, |
|
"loss": 0.0008, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 38.011340206185565, |
|
"grad_norm": 0.008205863647162914, |
|
"learning_rate": 2.4971363115693013e-06, |
|
"loss": 0.1451, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 38.01237113402062, |
|
"grad_norm": 0.0307331420481205, |
|
"learning_rate": 2.4856815578465066e-06, |
|
"loss": 0.1482, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 38.01340206185567, |
|
"grad_norm": 0.06798528879880905, |
|
"learning_rate": 2.4742268041237115e-06, |
|
"loss": 0.0751, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 38.014432989690725, |
|
"grad_norm": 0.0034336706157773733, |
|
"learning_rate": 2.4627720504009168e-06, |
|
"loss": 0.0006, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 38.01546391752577, |
|
"grad_norm": 0.14595551788806915, |
|
"learning_rate": 2.4513172966781217e-06, |
|
"loss": 0.0006, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 38.016494845360825, |
|
"grad_norm": 0.00959043949842453, |
|
"learning_rate": 2.4398625429553265e-06, |
|
"loss": 0.0011, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 38.01752577319588, |
|
"grad_norm": 77.62090301513672, |
|
"learning_rate": 2.428407789232532e-06, |
|
"loss": 0.5068, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 38.01855670103093, |
|
"grad_norm": 0.020476188510656357, |
|
"learning_rate": 2.4169530355097367e-06, |
|
"loss": 0.1332, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 38.01958762886598, |
|
"grad_norm": 0.06416749209165573, |
|
"learning_rate": 2.405498281786942e-06, |
|
"loss": 0.0009, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 38.02010309278351, |
|
"eval_accuracy": 0.6222222222222222, |
|
"eval_loss": 2.399278402328491, |
|
"eval_runtime": 12.9659, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.926, |
|
"step": 7605 |
|
}, |
|
{ |
|
"epoch": 39.00051546391752, |
|
"grad_norm": 5.2809295654296875, |
|
"learning_rate": 2.394043528064147e-06, |
|
"loss": 0.0652, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 39.001546391752576, |
|
"grad_norm": 0.008623295463621616, |
|
"learning_rate": 2.3825887743413517e-06, |
|
"loss": 0.1178, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 39.00257731958763, |
|
"grad_norm": 0.03899654373526573, |
|
"learning_rate": 2.3711340206185566e-06, |
|
"loss": 0.0365, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 39.00360824742268, |
|
"grad_norm": 0.01230724435299635, |
|
"learning_rate": 2.359679266895762e-06, |
|
"loss": 0.1324, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 39.00463917525773, |
|
"grad_norm": 0.5138551592826843, |
|
"learning_rate": 2.3482245131729668e-06, |
|
"loss": 0.0991, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 39.00567010309278, |
|
"grad_norm": 0.004986596293747425, |
|
"learning_rate": 2.336769759450172e-06, |
|
"loss": 0.002, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 39.006701030927836, |
|
"grad_norm": 0.04998844861984253, |
|
"learning_rate": 2.325315005727377e-06, |
|
"loss": 0.0007, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 39.00773195876289, |
|
"grad_norm": 0.0053374143317341805, |
|
"learning_rate": 2.3138602520045822e-06, |
|
"loss": 0.0531, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 39.008762886597935, |
|
"grad_norm": 326.64031982421875, |
|
"learning_rate": 2.302405498281787e-06, |
|
"loss": 0.307, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 39.00979381443299, |
|
"grad_norm": 0.18461164832115173, |
|
"learning_rate": 2.290950744558992e-06, |
|
"loss": 0.0038, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 39.01082474226804, |
|
"grad_norm": 0.01692032255232334, |
|
"learning_rate": 2.2794959908361973e-06, |
|
"loss": 0.001, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 39.011855670103095, |
|
"grad_norm": 0.012526489794254303, |
|
"learning_rate": 2.268041237113402e-06, |
|
"loss": 0.3061, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 39.01288659793814, |
|
"grad_norm": 0.009198295883834362, |
|
"learning_rate": 2.2565864833906075e-06, |
|
"loss": 0.1932, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 39.013917525773195, |
|
"grad_norm": 0.1381886750459671, |
|
"learning_rate": 2.2451317296678123e-06, |
|
"loss": 0.0878, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 39.01494845360825, |
|
"grad_norm": 0.03150659054517746, |
|
"learning_rate": 2.2336769759450176e-06, |
|
"loss": 0.1654, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 39.0159793814433, |
|
"grad_norm": 0.011908908374607563, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0004, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 39.01701030927835, |
|
"grad_norm": 16.51814842224121, |
|
"learning_rate": 2.2107674684994274e-06, |
|
"loss": 0.0024, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 39.0180412371134, |
|
"grad_norm": 0.05224655196070671, |
|
"learning_rate": 2.1993127147766322e-06, |
|
"loss": 0.0003, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 39.019072164948454, |
|
"grad_norm": 0.10637877881526947, |
|
"learning_rate": 2.1878579610538375e-06, |
|
"loss": 0.0116, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 39.02010309278351, |
|
"grad_norm": 0.08694580942392349, |
|
"learning_rate": 2.1764032073310424e-06, |
|
"loss": 0.0005, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 39.02010309278351, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 1.7377581596374512, |
|
"eval_runtime": 12.9146, |
|
"eval_samples_per_second": 3.484, |
|
"eval_steps_per_second": 0.929, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 40.00103092783505, |
|
"grad_norm": 0.0032964874990284443, |
|
"learning_rate": 2.1649484536082477e-06, |
|
"loss": 0.057, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 40.00206185567011, |
|
"grad_norm": 0.01317548006772995, |
|
"learning_rate": 2.1534936998854526e-06, |
|
"loss": 0.0008, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 40.00309278350515, |
|
"grad_norm": 0.017642924562096596, |
|
"learning_rate": 2.1420389461626575e-06, |
|
"loss": 0.0005, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 40.004123711340206, |
|
"grad_norm": 0.012752017937600613, |
|
"learning_rate": 2.1305841924398628e-06, |
|
"loss": 0.0004, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 40.00515463917526, |
|
"grad_norm": 0.06961321830749512, |
|
"learning_rate": 2.1191294387170676e-06, |
|
"loss": 0.0003, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 40.00618556701031, |
|
"grad_norm": 0.09094371646642685, |
|
"learning_rate": 2.107674684994273e-06, |
|
"loss": 0.1611, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 40.00721649484536, |
|
"grad_norm": 0.008556496351957321, |
|
"learning_rate": 2.096219931271478e-06, |
|
"loss": 0.0007, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 40.00824742268041, |
|
"grad_norm": 309.84698486328125, |
|
"learning_rate": 2.084765177548683e-06, |
|
"loss": 0.2056, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 40.009278350515466, |
|
"grad_norm": 0.03041210025548935, |
|
"learning_rate": 2.073310423825888e-06, |
|
"loss": 0.0025, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 40.01030927835052, |
|
"grad_norm": 82.68245697021484, |
|
"learning_rate": 2.061855670103093e-06, |
|
"loss": 0.026, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 40.011340206185565, |
|
"grad_norm": 0.02870158664882183, |
|
"learning_rate": 2.050400916380298e-06, |
|
"loss": 0.0072, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 40.01237113402062, |
|
"grad_norm": 0.02945534512400627, |
|
"learning_rate": 2.038946162657503e-06, |
|
"loss": 0.1894, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 40.01340206185567, |
|
"grad_norm": 0.007657837588340044, |
|
"learning_rate": 2.027491408934708e-06, |
|
"loss": 0.0024, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 40.014432989690725, |
|
"grad_norm": 0.05909576267004013, |
|
"learning_rate": 2.016036655211913e-06, |
|
"loss": 0.1906, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 40.01546391752577, |
|
"grad_norm": 0.5790134072303772, |
|
"learning_rate": 2.004581901489118e-06, |
|
"loss": 0.0005, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 40.016494845360825, |
|
"grad_norm": 0.0051000709645450115, |
|
"learning_rate": 1.9931271477663233e-06, |
|
"loss": 0.1721, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 40.01752577319588, |
|
"grad_norm": 0.011354904621839523, |
|
"learning_rate": 1.9816723940435282e-06, |
|
"loss": 0.0046, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 40.01855670103093, |
|
"grad_norm": 327.5389099121094, |
|
"learning_rate": 1.970217640320733e-06, |
|
"loss": 0.4001, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 40.01958762886598, |
|
"grad_norm": 0.006849356461316347, |
|
"learning_rate": 1.9587628865979384e-06, |
|
"loss": 0.0014, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 40.02010309278351, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 2.4453516006469727, |
|
"eval_runtime": 13.0464, |
|
"eval_samples_per_second": 3.449, |
|
"eval_steps_per_second": 0.92, |
|
"step": 7995 |
|
}, |
|
{ |
|
"epoch": 41.00051546391752, |
|
"grad_norm": 0.013001556508243084, |
|
"learning_rate": 1.9473081328751433e-06, |
|
"loss": 0.2103, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 41.001546391752576, |
|
"grad_norm": 107.24781799316406, |
|
"learning_rate": 1.9358533791523486e-06, |
|
"loss": 0.3106, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 41.00257731958763, |
|
"grad_norm": 0.008363613858819008, |
|
"learning_rate": 1.9243986254295534e-06, |
|
"loss": 0.028, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 41.00360824742268, |
|
"grad_norm": 0.04029637202620506, |
|
"learning_rate": 1.9129438717067583e-06, |
|
"loss": 0.0264, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 41.00463917525773, |
|
"grad_norm": 0.06103358045220375, |
|
"learning_rate": 1.9014891179839634e-06, |
|
"loss": 0.0334, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 41.00567010309278, |
|
"grad_norm": 0.00733905378729105, |
|
"learning_rate": 1.8900343642611685e-06, |
|
"loss": 0.0014, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 41.006701030927836, |
|
"grad_norm": 338.4005432128906, |
|
"learning_rate": 1.8785796105383736e-06, |
|
"loss": 0.0489, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 41.00773195876289, |
|
"grad_norm": 0.006053756456822157, |
|
"learning_rate": 1.8671248568155786e-06, |
|
"loss": 0.1368, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 41.008762886597935, |
|
"grad_norm": 0.0014053594786673784, |
|
"learning_rate": 1.8556701030927837e-06, |
|
"loss": 0.1025, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 41.00979381443299, |
|
"grad_norm": 0.0035459971986711025, |
|
"learning_rate": 1.8442153493699888e-06, |
|
"loss": 0.0006, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 41.01082474226804, |
|
"grad_norm": 384.0328369140625, |
|
"learning_rate": 1.8327605956471937e-06, |
|
"loss": 0.3021, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 41.011855670103095, |
|
"grad_norm": 0.5014728903770447, |
|
"learning_rate": 1.8213058419243988e-06, |
|
"loss": 0.1137, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 41.01288659793814, |
|
"grad_norm": 0.0056852176785469055, |
|
"learning_rate": 1.8098510882016038e-06, |
|
"loss": 0.0004, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 41.013917525773195, |
|
"grad_norm": 0.013269063085317612, |
|
"learning_rate": 1.798396334478809e-06, |
|
"loss": 0.0002, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 41.01494845360825, |
|
"grad_norm": 56.177669525146484, |
|
"learning_rate": 1.786941580756014e-06, |
|
"loss": 0.1576, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 41.0159793814433, |
|
"grad_norm": 0.43153733015060425, |
|
"learning_rate": 1.775486827033219e-06, |
|
"loss": 0.1699, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 41.01701030927835, |
|
"grad_norm": 0.010211652144789696, |
|
"learning_rate": 1.7640320733104242e-06, |
|
"loss": 0.0006, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 41.0180412371134, |
|
"grad_norm": 0.004664810374379158, |
|
"learning_rate": 1.7525773195876288e-06, |
|
"loss": 0.087, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 41.019072164948454, |
|
"grad_norm": 124.0995864868164, |
|
"learning_rate": 1.741122565864834e-06, |
|
"loss": 0.1361, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 41.02010309278351, |
|
"grad_norm": 0.04815516620874405, |
|
"learning_rate": 1.729667812142039e-06, |
|
"loss": 0.1203, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 41.02010309278351, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 2.113752841949463, |
|
"eval_runtime": 12.9032, |
|
"eval_samples_per_second": 3.488, |
|
"eval_steps_per_second": 0.93, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 42.00103092783505, |
|
"grad_norm": 0.0071434988640248775, |
|
"learning_rate": 1.718213058419244e-06, |
|
"loss": 0.0003, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 42.00206185567011, |
|
"grad_norm": 0.020918577909469604, |
|
"learning_rate": 1.7067583046964492e-06, |
|
"loss": 0.0011, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 42.00309278350515, |
|
"grad_norm": 0.03154708817601204, |
|
"learning_rate": 1.6953035509736543e-06, |
|
"loss": 0.0815, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 42.004123711340206, |
|
"grad_norm": 0.013127969577908516, |
|
"learning_rate": 1.6838487972508594e-06, |
|
"loss": 0.0354, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 42.00515463917526, |
|
"grad_norm": 0.11450503766536713, |
|
"learning_rate": 1.6723940435280642e-06, |
|
"loss": 0.6204, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 42.00618556701031, |
|
"grad_norm": 0.007757487706840038, |
|
"learning_rate": 1.6609392898052693e-06, |
|
"loss": 0.1229, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 42.00721649484536, |
|
"grad_norm": 0.015443161129951477, |
|
"learning_rate": 1.6494845360824744e-06, |
|
"loss": 0.0002, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 42.00824742268041, |
|
"grad_norm": 0.002430438296869397, |
|
"learning_rate": 1.6380297823596795e-06, |
|
"loss": 0.0002, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 42.009278350515466, |
|
"grad_norm": 0.10335277765989304, |
|
"learning_rate": 1.6265750286368846e-06, |
|
"loss": 0.0185, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 42.01030927835052, |
|
"grad_norm": 0.002046901499852538, |
|
"learning_rate": 1.6151202749140896e-06, |
|
"loss": 0.0508, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 42.011340206185565, |
|
"grad_norm": 0.005286916624754667, |
|
"learning_rate": 1.6036655211912947e-06, |
|
"loss": 0.0702, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 42.01237113402062, |
|
"grad_norm": 0.007195398211479187, |
|
"learning_rate": 1.5922107674684994e-06, |
|
"loss": 0.3414, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 42.01340206185567, |
|
"grad_norm": 0.002511281054466963, |
|
"learning_rate": 1.5807560137457045e-06, |
|
"loss": 0.2097, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 42.014432989690725, |
|
"grad_norm": 201.3845977783203, |
|
"learning_rate": 1.5693012600229096e-06, |
|
"loss": 0.2369, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 42.01546391752577, |
|
"grad_norm": 0.04441210627555847, |
|
"learning_rate": 1.5578465063001146e-06, |
|
"loss": 0.0554, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 42.016494845360825, |
|
"grad_norm": 12.391925811767578, |
|
"learning_rate": 1.5463917525773197e-06, |
|
"loss": 0.0019, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 42.01752577319588, |
|
"grad_norm": 0.1355491429567337, |
|
"learning_rate": 1.5349369988545248e-06, |
|
"loss": 0.0007, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 42.01855670103093, |
|
"grad_norm": 0.0035951670724898577, |
|
"learning_rate": 1.52348224513173e-06, |
|
"loss": 0.1423, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 42.01958762886598, |
|
"grad_norm": 0.007604878395795822, |
|
"learning_rate": 1.5120274914089348e-06, |
|
"loss": 0.0138, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 42.02010309278351, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 2.1768667697906494, |
|
"eval_runtime": 12.9363, |
|
"eval_samples_per_second": 3.479, |
|
"eval_steps_per_second": 0.928, |
|
"step": 8385 |
|
}, |
|
{ |
|
"epoch": 43.00051546391752, |
|
"grad_norm": 0.026959659531712532, |
|
"learning_rate": 1.5005727376861399e-06, |
|
"loss": 0.0004, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 43.001546391752576, |
|
"grad_norm": 0.09655001759529114, |
|
"learning_rate": 1.489117983963345e-06, |
|
"loss": 0.2277, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 43.00257731958763, |
|
"grad_norm": 1.289469838142395, |
|
"learning_rate": 1.47766323024055e-06, |
|
"loss": 0.1235, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 43.00360824742268, |
|
"grad_norm": 0.0012255634646862745, |
|
"learning_rate": 1.4662084765177551e-06, |
|
"loss": 0.0079, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 43.00463917525773, |
|
"grad_norm": 0.0007930409628897905, |
|
"learning_rate": 1.4547537227949602e-06, |
|
"loss": 0.0002, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 43.00567010309278, |
|
"grad_norm": 0.024416925385594368, |
|
"learning_rate": 1.4432989690721649e-06, |
|
"loss": 0.3317, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 43.006701030927836, |
|
"grad_norm": 0.0007845965446904302, |
|
"learning_rate": 1.43184421534937e-06, |
|
"loss": 0.0002, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 43.00773195876289, |
|
"grad_norm": 0.001673469552770257, |
|
"learning_rate": 1.420389461626575e-06, |
|
"loss": 0.0028, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 43.008762886597935, |
|
"grad_norm": 0.00685878423973918, |
|
"learning_rate": 1.4089347079037801e-06, |
|
"loss": 0.0008, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 43.00979381443299, |
|
"grad_norm": 0.0035130823962390423, |
|
"learning_rate": 1.3974799541809852e-06, |
|
"loss": 0.0007, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 43.01082474226804, |
|
"grad_norm": 198.6305694580078, |
|
"learning_rate": 1.3860252004581903e-06, |
|
"loss": 0.1222, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 43.011855670103095, |
|
"grad_norm": 0.0031229022424668074, |
|
"learning_rate": 1.3745704467353954e-06, |
|
"loss": 0.0002, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 43.01288659793814, |
|
"grad_norm": 0.004219813738018274, |
|
"learning_rate": 1.3631156930126002e-06, |
|
"loss": 0.0001, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 43.013917525773195, |
|
"grad_norm": 0.0015190464910119772, |
|
"learning_rate": 1.3516609392898053e-06, |
|
"loss": 0.014, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 43.01494845360825, |
|
"grad_norm": 0.003646484576165676, |
|
"learning_rate": 1.3402061855670104e-06, |
|
"loss": 0.1198, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 43.0159793814433, |
|
"grad_norm": 0.0039468565955758095, |
|
"learning_rate": 1.3287514318442155e-06, |
|
"loss": 0.0002, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 43.01701030927835, |
|
"grad_norm": 0.0018218251643702388, |
|
"learning_rate": 1.3172966781214206e-06, |
|
"loss": 0.0013, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 43.0180412371134, |
|
"grad_norm": 104.12612915039062, |
|
"learning_rate": 1.3058419243986257e-06, |
|
"loss": 0.2044, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 43.019072164948454, |
|
"grad_norm": 0.0026801032945513725, |
|
"learning_rate": 1.2943871706758307e-06, |
|
"loss": 0.0003, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 43.02010309278351, |
|
"grad_norm": 0.0005498857935890555, |
|
"learning_rate": 1.2829324169530354e-06, |
|
"loss": 0.3569, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 43.02010309278351, |
|
"eval_accuracy": 0.6222222222222222, |
|
"eval_loss": 2.6946287155151367, |
|
"eval_runtime": 13.5653, |
|
"eval_samples_per_second": 3.317, |
|
"eval_steps_per_second": 0.885, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 44.00103092783505, |
|
"grad_norm": 0.0034866807982325554, |
|
"learning_rate": 1.2714776632302405e-06, |
|
"loss": 0.0771, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 44.00206185567011, |
|
"grad_norm": 0.0050008767284452915, |
|
"learning_rate": 1.2600229095074456e-06, |
|
"loss": 0.1379, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 44.00309278350515, |
|
"grad_norm": 0.003171220887452364, |
|
"learning_rate": 1.2485681557846507e-06, |
|
"loss": 0.0002, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 44.004123711340206, |
|
"grad_norm": 0.00555449491366744, |
|
"learning_rate": 1.2371134020618557e-06, |
|
"loss": 0.2303, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 44.00515463917526, |
|
"grad_norm": 0.00758061558008194, |
|
"learning_rate": 1.2256586483390608e-06, |
|
"loss": 0.0019, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 44.00618556701031, |
|
"grad_norm": 35.78764724731445, |
|
"learning_rate": 1.214203894616266e-06, |
|
"loss": 0.2258, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 44.00721649484536, |
|
"grad_norm": 0.002628314308822155, |
|
"learning_rate": 1.202749140893471e-06, |
|
"loss": 0.1071, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 44.00824742268041, |
|
"grad_norm": 0.0065179308876395226, |
|
"learning_rate": 1.1912943871706759e-06, |
|
"loss": 0.0002, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 44.009278350515466, |
|
"grad_norm": 0.00975864939391613, |
|
"learning_rate": 1.179839633447881e-06, |
|
"loss": 0.0002, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 44.01030927835052, |
|
"grad_norm": 0.008498983457684517, |
|
"learning_rate": 1.168384879725086e-06, |
|
"loss": 0.0009, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 44.011340206185565, |
|
"grad_norm": 0.0022127425763756037, |
|
"learning_rate": 1.1569301260022911e-06, |
|
"loss": 0.0182, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 44.01237113402062, |
|
"grad_norm": 0.015520663000643253, |
|
"learning_rate": 1.145475372279496e-06, |
|
"loss": 0.1718, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 44.01340206185567, |
|
"grad_norm": 0.00714871846139431, |
|
"learning_rate": 1.134020618556701e-06, |
|
"loss": 0.0001, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 44.014432989690725, |
|
"grad_norm": 98.3997573852539, |
|
"learning_rate": 1.1225658648339062e-06, |
|
"loss": 0.3241, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 44.01546391752577, |
|
"grad_norm": 0.004079570062458515, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0002, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 44.016494845360825, |
|
"grad_norm": 0.002131837885826826, |
|
"learning_rate": 1.0996563573883161e-06, |
|
"loss": 0.0002, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 44.01752577319588, |
|
"grad_norm": 0.027215346693992615, |
|
"learning_rate": 1.0882016036655212e-06, |
|
"loss": 0.388, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 44.01855670103093, |
|
"grad_norm": 0.025429489091038704, |
|
"learning_rate": 1.0767468499427263e-06, |
|
"loss": 0.1207, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 44.01958762886598, |
|
"grad_norm": 0.5874564051628113, |
|
"learning_rate": 1.0652920962199314e-06, |
|
"loss": 0.0002, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 44.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.156606435775757, |
|
"eval_runtime": 13.2361, |
|
"eval_samples_per_second": 3.4, |
|
"eval_steps_per_second": 0.907, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 45.00051546391752, |
|
"grad_norm": 0.01159296091645956, |
|
"learning_rate": 1.0538373424971365e-06, |
|
"loss": 0.0002, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 45.001546391752576, |
|
"grad_norm": 0.002699700416997075, |
|
"learning_rate": 1.0423825887743415e-06, |
|
"loss": 0.1022, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 45.00257731958763, |
|
"grad_norm": 0.002690671943128109, |
|
"learning_rate": 1.0309278350515464e-06, |
|
"loss": 0.0003, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 45.00360824742268, |
|
"grad_norm": 0.011441020295023918, |
|
"learning_rate": 1.0194730813287515e-06, |
|
"loss": 0.0027, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 45.00463917525773, |
|
"grad_norm": 0.3645267188549042, |
|
"learning_rate": 1.0080183276059566e-06, |
|
"loss": 0.0002, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 45.00567010309278, |
|
"grad_norm": 0.014001118950545788, |
|
"learning_rate": 9.965635738831617e-07, |
|
"loss": 0.0002, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 45.006701030927836, |
|
"grad_norm": 0.0017673630500212312, |
|
"learning_rate": 9.851088201603665e-07, |
|
"loss": 0.0006, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 45.00773195876289, |
|
"grad_norm": 44.564510345458984, |
|
"learning_rate": 9.736540664375716e-07, |
|
"loss": 0.4445, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 45.008762886597935, |
|
"grad_norm": 0.09444551169872284, |
|
"learning_rate": 9.621993127147767e-07, |
|
"loss": 0.0002, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 45.00979381443299, |
|
"grad_norm": 0.001571907545439899, |
|
"learning_rate": 9.507445589919817e-07, |
|
"loss": 0.2272, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 45.01082474226804, |
|
"grad_norm": 117.25080108642578, |
|
"learning_rate": 9.392898052691868e-07, |
|
"loss": 0.0253, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 45.011855670103095, |
|
"grad_norm": 0.02719367854297161, |
|
"learning_rate": 9.278350515463919e-07, |
|
"loss": 0.2072, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 45.01288659793814, |
|
"grad_norm": 0.011369436047971249, |
|
"learning_rate": 9.163802978235968e-07, |
|
"loss": 0.0002, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 45.013917525773195, |
|
"grad_norm": 0.009269513189792633, |
|
"learning_rate": 9.049255441008019e-07, |
|
"loss": 0.0184, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 45.01494845360825, |
|
"grad_norm": 0.003293866291642189, |
|
"learning_rate": 8.93470790378007e-07, |
|
"loss": 0.0084, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 45.0159793814433, |
|
"grad_norm": 0.012929639779031277, |
|
"learning_rate": 8.820160366552121e-07, |
|
"loss": 0.0005, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 45.01701030927835, |
|
"grad_norm": 0.012119736522436142, |
|
"learning_rate": 8.70561282932417e-07, |
|
"loss": 0.2072, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 45.0180412371134, |
|
"grad_norm": 0.1019548624753952, |
|
"learning_rate": 8.59106529209622e-07, |
|
"loss": 0.0002, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 45.019072164948454, |
|
"grad_norm": 0.005744592752307653, |
|
"learning_rate": 8.476517754868271e-07, |
|
"loss": 0.1167, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 45.02010309278351, |
|
"grad_norm": 0.08731329441070557, |
|
"learning_rate": 8.361970217640321e-07, |
|
"loss": 0.0924, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 45.02010309278351, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 2.4635937213897705, |
|
"eval_runtime": 12.8852, |
|
"eval_samples_per_second": 3.492, |
|
"eval_steps_per_second": 0.931, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 46.00103092783505, |
|
"grad_norm": 0.006049647461622953, |
|
"learning_rate": 8.247422680412372e-07, |
|
"loss": 0.3291, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 46.00206185567011, |
|
"grad_norm": 206.76785278320312, |
|
"learning_rate": 8.132875143184423e-07, |
|
"loss": 0.1158, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 46.00309278350515, |
|
"grad_norm": 0.002335500903427601, |
|
"learning_rate": 8.018327605956474e-07, |
|
"loss": 0.0002, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 46.004123711340206, |
|
"grad_norm": 0.02907603606581688, |
|
"learning_rate": 7.903780068728522e-07, |
|
"loss": 0.0002, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 46.00515463917526, |
|
"grad_norm": 0.004002581350505352, |
|
"learning_rate": 7.789232531500573e-07, |
|
"loss": 0.3073, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 46.00618556701031, |
|
"grad_norm": 0.005749888252466917, |
|
"learning_rate": 7.674684994272624e-07, |
|
"loss": 0.3535, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 46.00721649484536, |
|
"grad_norm": 0.008823657408356667, |
|
"learning_rate": 7.560137457044674e-07, |
|
"loss": 0.0001, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 46.00824742268041, |
|
"grad_norm": 0.005698314867913723, |
|
"learning_rate": 7.445589919816725e-07, |
|
"loss": 0.0002, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 46.009278350515466, |
|
"grad_norm": 161.3025665283203, |
|
"learning_rate": 7.331042382588776e-07, |
|
"loss": 0.1556, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 46.01030927835052, |
|
"grad_norm": 0.010940884239971638, |
|
"learning_rate": 7.216494845360824e-07, |
|
"loss": 0.0938, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 46.011340206185565, |
|
"grad_norm": 0.004464665427803993, |
|
"learning_rate": 7.101947308132875e-07, |
|
"loss": 0.0002, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 46.01237113402062, |
|
"grad_norm": 0.0026364498771727085, |
|
"learning_rate": 6.987399770904926e-07, |
|
"loss": 0.0002, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 46.01340206185567, |
|
"grad_norm": 0.009777194820344448, |
|
"learning_rate": 6.872852233676977e-07, |
|
"loss": 0.0002, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 46.014432989690725, |
|
"grad_norm": 0.008579901419579983, |
|
"learning_rate": 6.758304696449027e-07, |
|
"loss": 0.0001, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 46.01546391752577, |
|
"grad_norm": 0.6798976063728333, |
|
"learning_rate": 6.643757159221077e-07, |
|
"loss": 0.015, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 46.016494845360825, |
|
"grad_norm": 0.032176949083805084, |
|
"learning_rate": 6.529209621993128e-07, |
|
"loss": 0.0002, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 46.01752577319588, |
|
"grad_norm": 0.010497340932488441, |
|
"learning_rate": 6.414662084765177e-07, |
|
"loss": 0.0002, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 46.01855670103093, |
|
"grad_norm": 0.022227320820093155, |
|
"learning_rate": 6.300114547537228e-07, |
|
"loss": 0.0006, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 46.01958762886598, |
|
"grad_norm": 0.02829979918897152, |
|
"learning_rate": 6.185567010309279e-07, |
|
"loss": 0.0004, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 46.02010309278351, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 2.1075925827026367, |
|
"eval_runtime": 12.8105, |
|
"eval_samples_per_second": 3.513, |
|
"eval_steps_per_second": 0.937, |
|
"step": 9165 |
|
}, |
|
{ |
|
"epoch": 47.00051546391752, |
|
"grad_norm": 0.004454383160918951, |
|
"learning_rate": 6.07101947308133e-07, |
|
"loss": 0.0002, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 47.001546391752576, |
|
"grad_norm": 0.007718723267316818, |
|
"learning_rate": 5.956471935853379e-07, |
|
"loss": 0.0004, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 47.00257731958763, |
|
"grad_norm": 0.0052038333378732204, |
|
"learning_rate": 5.84192439862543e-07, |
|
"loss": 0.1885, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 47.00360824742268, |
|
"grad_norm": 150.0712127685547, |
|
"learning_rate": 5.72737686139748e-07, |
|
"loss": 0.0726, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 47.00463917525773, |
|
"grad_norm": 0.002846953459084034, |
|
"learning_rate": 5.612829324169531e-07, |
|
"loss": 0.0002, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 47.00567010309278, |
|
"grad_norm": 0.0032731653191149235, |
|
"learning_rate": 5.498281786941581e-07, |
|
"loss": 0.0014, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 47.006701030927836, |
|
"grad_norm": 0.04309961199760437, |
|
"learning_rate": 5.383734249713631e-07, |
|
"loss": 0.0002, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 47.00773195876289, |
|
"grad_norm": 0.01399976946413517, |
|
"learning_rate": 5.269186712485682e-07, |
|
"loss": 0.0002, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 47.008762886597935, |
|
"grad_norm": 0.007444604765623808, |
|
"learning_rate": 5.154639175257732e-07, |
|
"loss": 0.0002, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 47.00979381443299, |
|
"grad_norm": 0.0034925006330013275, |
|
"learning_rate": 5.040091638029783e-07, |
|
"loss": 0.04, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 47.01082474226804, |
|
"grad_norm": 0.0037354633677750826, |
|
"learning_rate": 4.925544100801833e-07, |
|
"loss": 0.0002, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 47.011855670103095, |
|
"grad_norm": 0.0180855430662632, |
|
"learning_rate": 4.810996563573884e-07, |
|
"loss": 0.0004, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 47.01288659793814, |
|
"grad_norm": 0.007617347873747349, |
|
"learning_rate": 4.696449026345934e-07, |
|
"loss": 0.183, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 47.013917525773195, |
|
"grad_norm": 0.011123509146273136, |
|
"learning_rate": 4.581901489117984e-07, |
|
"loss": 0.0002, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 47.01494845360825, |
|
"grad_norm": 0.019915536046028137, |
|
"learning_rate": 4.467353951890035e-07, |
|
"loss": 0.0002, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 47.0159793814433, |
|
"grad_norm": 0.003200843231752515, |
|
"learning_rate": 4.352806414662085e-07, |
|
"loss": 0.0004, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 47.01701030927835, |
|
"grad_norm": 0.018746715039014816, |
|
"learning_rate": 4.2382588774341357e-07, |
|
"loss": 0.1612, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 47.0180412371134, |
|
"grad_norm": 0.004534402396529913, |
|
"learning_rate": 4.123711340206186e-07, |
|
"loss": 0.1429, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 47.019072164948454, |
|
"grad_norm": 0.014469140209257603, |
|
"learning_rate": 4.009163802978237e-07, |
|
"loss": 0.0001, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 47.02010309278351, |
|
"grad_norm": 0.0009225418325513601, |
|
"learning_rate": 3.8946162657502866e-07, |
|
"loss": 0.0645, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 47.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.1502678394317627, |
|
"eval_runtime": 12.792, |
|
"eval_samples_per_second": 3.518, |
|
"eval_steps_per_second": 0.938, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 48.00103092783505, |
|
"grad_norm": 0.008684216067194939, |
|
"learning_rate": 3.780068728522337e-07, |
|
"loss": 0.0002, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 48.00206185567011, |
|
"grad_norm": 0.002957036718726158, |
|
"learning_rate": 3.665521191294388e-07, |
|
"loss": 0.0059, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 48.00309278350515, |
|
"grad_norm": 0.0021803213749080896, |
|
"learning_rate": 3.5509736540664376e-07, |
|
"loss": 0.0002, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 48.004123711340206, |
|
"grad_norm": 0.00486848596483469, |
|
"learning_rate": 3.4364261168384884e-07, |
|
"loss": 0.0001, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 48.00515463917526, |
|
"grad_norm": 0.0031498922035098076, |
|
"learning_rate": 3.3218785796105387e-07, |
|
"loss": 0.1926, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 48.00618556701031, |
|
"grad_norm": 0.004292685072869062, |
|
"learning_rate": 3.2073310423825885e-07, |
|
"loss": 0.0026, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 48.00721649484536, |
|
"grad_norm": 0.003267282620072365, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"loss": 0.0006, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 48.00824742268041, |
|
"grad_norm": 0.022121211513876915, |
|
"learning_rate": 2.9782359679266897e-07, |
|
"loss": 0.0001, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 48.009278350515466, |
|
"grad_norm": 375.67889404296875, |
|
"learning_rate": 2.86368843069874e-07, |
|
"loss": 0.0801, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 48.01030927835052, |
|
"grad_norm": 23.7979736328125, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"loss": 0.0009, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 48.011340206185565, |
|
"grad_norm": 0.0010315030813217163, |
|
"learning_rate": 2.634593356242841e-07, |
|
"loss": 0.0671, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 48.01237113402062, |
|
"grad_norm": 0.01132703386247158, |
|
"learning_rate": 2.5200458190148915e-07, |
|
"loss": 0.0056, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 48.01340206185567, |
|
"grad_norm": 0.0034062955528497696, |
|
"learning_rate": 2.405498281786942e-07, |
|
"loss": 0.0002, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 48.014432989690725, |
|
"grad_norm": 0.23974573612213135, |
|
"learning_rate": 2.290950744558992e-07, |
|
"loss": 0.0002, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 48.01546391752577, |
|
"grad_norm": 0.006117957644164562, |
|
"learning_rate": 2.1764032073310424e-07, |
|
"loss": 0.3933, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 48.016494845360825, |
|
"grad_norm": 0.23375919461250305, |
|
"learning_rate": 2.061855670103093e-07, |
|
"loss": 0.0328, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 48.01752577319588, |
|
"grad_norm": 0.003966511692851782, |
|
"learning_rate": 1.9473081328751433e-07, |
|
"loss": 0.0002, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 48.01855670103093, |
|
"grad_norm": 0.015851015225052834, |
|
"learning_rate": 1.832760595647194e-07, |
|
"loss": 0.0002, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 48.01958762886598, |
|
"grad_norm": 0.07893943041563034, |
|
"learning_rate": 1.7182130584192442e-07, |
|
"loss": 0.1121, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 48.02010309278351, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.2610628604888916, |
|
"eval_runtime": 12.1107, |
|
"eval_samples_per_second": 3.716, |
|
"eval_steps_per_second": 0.991, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 49.00051546391752, |
|
"grad_norm": 85.26998901367188, |
|
"learning_rate": 1.6036655211912943e-07, |
|
"loss": 0.1761, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 49.001546391752576, |
|
"grad_norm": 0.005270041059702635, |
|
"learning_rate": 1.4891179839633448e-07, |
|
"loss": 0.1176, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 49.00257731958763, |
|
"grad_norm": 0.004510932136327028, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"loss": 0.0033, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 49.00360824742268, |
|
"grad_norm": 0.005600025877356529, |
|
"learning_rate": 1.2600229095074457e-07, |
|
"loss": 0.2291, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 49.00463917525773, |
|
"grad_norm": 0.03189009428024292, |
|
"learning_rate": 1.145475372279496e-07, |
|
"loss": 0.0002, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 49.00567010309278, |
|
"grad_norm": 0.06988941878080368, |
|
"learning_rate": 1.0309278350515465e-07, |
|
"loss": 0.0002, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 49.006701030927836, |
|
"grad_norm": 0.0760478675365448, |
|
"learning_rate": 9.16380297823597e-08, |
|
"loss": 0.0005, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 49.00773195876289, |
|
"grad_norm": 113.97074890136719, |
|
"learning_rate": 8.018327605956471e-08, |
|
"loss": 0.1573, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 49.008762886597935, |
|
"grad_norm": 0.008412045426666737, |
|
"learning_rate": 6.872852233676976e-08, |
|
"loss": 0.2028, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 49.00979381443299, |
|
"grad_norm": 0.014376115053892136, |
|
"learning_rate": 5.72737686139748e-08, |
|
"loss": 0.0003, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 49.01082474226804, |
|
"grad_norm": 0.02352799102663994, |
|
"learning_rate": 4.581901489117985e-08, |
|
"loss": 0.1283, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 49.011855670103095, |
|
"grad_norm": 0.0021438777912408113, |
|
"learning_rate": 3.436426116838488e-08, |
|
"loss": 0.1895, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 49.01288659793814, |
|
"grad_norm": 44.23455810546875, |
|
"learning_rate": 2.2909507445589924e-08, |
|
"loss": 0.1931, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 49.013917525773195, |
|
"grad_norm": 0.00817310530692339, |
|
"learning_rate": 1.1454753722794962e-08, |
|
"loss": 0.0002, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 49.01494845360825, |
|
"grad_norm": 0.005044811405241489, |
|
"learning_rate": 0.0, |
|
"loss": 0.1268, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 49.01494845360825, |
|
"eval_accuracy": 0.7111111111111111, |
|
"eval_loss": 2.172813653945923, |
|
"eval_runtime": 13.8199, |
|
"eval_samples_per_second": 3.256, |
|
"eval_steps_per_second": 0.868, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 49.01494845360825, |
|
"step": 9700, |
|
"total_flos": 1.6972634016516597e+20, |
|
"train_loss": 0.2964441666583838, |
|
"train_runtime": 22323.5867, |
|
"train_samples_per_second": 1.738, |
|
"train_steps_per_second": 0.435 |
|
}, |
|
{ |
|
"epoch": 49.01494845360825, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.36465370655059814, |
|
"eval_runtime": 12.1222, |
|
"eval_samples_per_second": 3.712, |
|
"eval_steps_per_second": 0.99, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 49.01494845360825, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.36465373635292053, |
|
"eval_runtime": 12.3017, |
|
"eval_samples_per_second": 3.658, |
|
"eval_steps_per_second": 0.975, |
|
"step": 9700 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6972634016516597e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|