{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999163179916318, "eval_steps": 500, "global_step": 420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07140864714086471, "grad_norm": 77.45325469970703, "learning_rate": 3.0769230769230774e-05, "loss": 0.6702, "step": 10 }, { "epoch": 0.14281729428172943, "grad_norm": 59.097469329833984, "learning_rate": 3.997081220115612e-05, "loss": 0.5793, "step": 20 }, { "epoch": 0.21422594142259413, "grad_norm": 52.32804870605469, "learning_rate": 3.982805660299152e-05, "loss": 0.5313, "step": 30 }, { "epoch": 0.28563458856345886, "grad_norm": 42.44932174682617, "learning_rate": 3.956722125241571e-05, "loss": 0.4916, "step": 40 }, { "epoch": 0.35704323570432356, "grad_norm": 44.81378936767578, "learning_rate": 3.9189859472289956e-05, "loss": 0.4705, "step": 50 }, { "epoch": 0.42845188284518826, "grad_norm": 47.092281341552734, "learning_rate": 3.869821852196291e-05, "loss": 0.4503, "step": 60 }, { "epoch": 0.499860529986053, "grad_norm": 49.76475143432617, "learning_rate": 3.809522621442463e-05, "loss": 0.4432, "step": 70 }, { "epoch": 0.5712691771269177, "grad_norm": 47.245887756347656, "learning_rate": 3.738447348063752e-05, "loss": 0.4199, "step": 80 }, { "epoch": 0.6426778242677824, "grad_norm": 53.50367736816406, "learning_rate": 3.657019298487685e-05, "loss": 0.4037, "step": 90 }, { "epoch": 0.7140864714086471, "grad_norm": 45.266780853271484, "learning_rate": 3.565723391843037e-05, "loss": 0.3886, "step": 100 }, { "epoch": 0.7854951185495118, "grad_norm": 42.24121856689453, "learning_rate": 3.465103312176541e-05, "loss": 0.382, "step": 110 }, { "epoch": 0.8569037656903765, "grad_norm": 46.39073181152344, "learning_rate": 3.35575827071361e-05, "loss": 0.366, "step": 120 }, { "epoch": 0.9283124128312413, "grad_norm": 44.36848831176758, "learning_rate": 3.238339437444418e-05, "loss": 0.3539, "step": 130 }, { "epoch": 0.999721059972106, "grad_norm": 43.62345504760742, "learning_rate": 3.113546063285907e-05, "loss": 0.348, "step": 140 }, { "epoch": 1.0711297071129706, "grad_norm": 42.84613037109375, "learning_rate": 2.9821213159129655e-05, "loss": 0.2955, "step": 150 }, { "epoch": 1.1425383542538354, "grad_norm": 47.24495315551758, "learning_rate": 2.8448478540571694e-05, "loss": 0.2896, "step": 160 }, { "epoch": 1.2139470013947, "grad_norm": 42.732933044433594, "learning_rate": 2.70254316662896e-05, "loss": 0.2889, "step": 170 }, { "epoch": 1.2853556485355648, "grad_norm": 44.07963180541992, "learning_rate": 2.5560547044196552e-05, "loss": 0.288, "step": 180 }, { "epoch": 1.3567642956764296, "grad_norm": 43.4594612121582, "learning_rate": 2.4062548333748996e-05, "loss": 0.2808, "step": 190 }, { "epoch": 1.4281729428172942, "grad_norm": 42.07709884643555, "learning_rate": 2.2540356394937577e-05, "loss": 0.2762, "step": 200 }, { "epoch": 1.499581589958159, "grad_norm": 41.83521270751953, "learning_rate": 2.1003036162912327e-05, "loss": 0.2798, "step": 210 }, { "epoch": 1.5709902370990236, "grad_norm": 40.76252365112305, "learning_rate": 1.945974266461355e-05, "loss": 0.2747, "step": 220 }, { "epoch": 1.6423988842398884, "grad_norm": 44.36031723022461, "learning_rate": 1.791966649888943e-05, "loss": 0.2585, "step": 230 }, { "epoch": 1.7138075313807533, "grad_norm": 40.18080139160156, "learning_rate": 1.639197910477628e-05, "loss": 0.2593, "step": 240 }, { "epoch": 1.7852161785216178, "grad_norm": 42.134490966796875, "learning_rate": 1.4885778143879096e-05, "loss": 0.2474, "step": 250 }, { "epoch": 1.8566248256624824, "grad_norm": 47.6600456237793, "learning_rate": 1.3410033322110323e-05, "loss": 0.2593, "step": 260 }, { "epoch": 1.9280334728033472, "grad_norm": 43.97560119628906, "learning_rate": 1.1973532973428536e-05, "loss": 0.2482, "step": 270 }, { "epoch": 1.999442119944212, "grad_norm": 41.20925521850586, "learning_rate": 1.05848317236807e-05, "loss": 0.2488, "step": 280 }, { "epoch": 2.070850767085077, "grad_norm": 44.23745346069336, "learning_rate": 9.25219954621956e-06, "loss": 0.196, "step": 290 }, { "epoch": 2.1422594142259412, "grad_norm": 45.39742660522461, "learning_rate": 7.983572512679384e-06, "loss": 0.1993, "step": 300 }, { "epoch": 2.213668061366806, "grad_norm": 43.84931182861328, "learning_rate": 6.7865055321983754e-06, "loss": 0.1937, "step": 310 }, { "epoch": 2.285076708507671, "grad_norm": 44.85445022583008, "learning_rate": 5.668127360534343e-06, "loss": 0.191, "step": 320 }, { "epoch": 2.3564853556485357, "grad_norm": 39.72176742553711, "learning_rate": 4.635098147002792e-06, "loss": 0.1931, "step": 330 }, { "epoch": 2.4278940027894, "grad_norm": 44.86882400512695, "learning_rate": 3.6935697720532095e-06, "loss": 0.1827, "step": 340 }, { "epoch": 2.499302649930265, "grad_norm": 42.152137756347656, "learning_rate": 2.849149211680693e-06, "loss": 0.1886, "step": 350 }, { "epoch": 2.5707112970711297, "grad_norm": 39.35033416748047, "learning_rate": 2.1068651468445546e-06, "loss": 0.19, "step": 360 }, { "epoch": 2.6421199442119945, "grad_norm": 40.978153228759766, "learning_rate": 1.4711380167411094e-06, "loss": 0.1912, "step": 370 }, { "epoch": 2.7135285913528593, "grad_norm": 44.58827209472656, "learning_rate": 9.45753694268885e-07, "loss": 0.1828, "step": 380 }, { "epoch": 2.7849372384937237, "grad_norm": 41.021331787109375, "learning_rate": 5.338409404537537e-07, "loss": 0.1808, "step": 390 }, { "epoch": 2.8563458856345885, "grad_norm": 42.61368179321289, "learning_rate": 2.3785277209707802e-07, "loss": 0.1946, "step": 400 }, { "epoch": 2.9277545327754533, "grad_norm": 39.83855438232422, "learning_rate": 5.9551853605968044e-08, "loss": 0.1858, "step": 410 }, { "epoch": 2.999163179916318, "grad_norm": 39.88948059082031, "learning_rate": 0.0, "loss": 0.1823, "step": 420 }, { "epoch": 2.999163179916318, "step": 420, "total_flos": 4.006659812993925e+17, "train_loss": 0.2970780080273038, "train_runtime": 9700.7388, "train_samples_per_second": 5.543, "train_steps_per_second": 0.043 } ], "logging_steps": 10, "max_steps": 420, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.006659812993925e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }