diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.04694358968884695, + "eval_steps": 500, + "global_step": 5110000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0004999999998149024, + "loss": 8.5332, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999990629436, + "loss": 8.1061, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999997732555, + "loss": 8.1201, + "step": 15000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999958237364, + "loss": 8.1658, + "step": 20000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999993336488, + "loss": 8.2813, + "step": 25000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999902708098, + "loss": 8.3271, + "step": 30000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999866267018, + "loss": 8.3799, + "step": 35000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999824041639, + "loss": 8.4484, + "step": 40000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999776031961, + "loss": 8.4779, + "step": 45000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999722237985, + "loss": 8.4402, + "step": 50000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999662659712, + "loss": 8.4371, + "step": 55000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999597297141, + "loss": 8.4231, + "step": 60000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999526150273, + "loss": 8.3955, + "step": 65000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999449219107, + "loss": 8.4, + "step": 70000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999366503643, + "loss": 8.414, + "step": 75000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999278003882, + "loss": 8.4149, + "step": 80000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999183719824, + "loss": 8.4205, + "step": 85000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999908365147, + "loss": 8.4157, + "step": 90000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998977798819, + "loss": 8.4017, + "step": 95000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998866161871, + "loss": 8.4086, + "step": 100000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998748740628, + "loss": 8.3796, + "step": 105000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999862553509, + "loss": 8.4095, + "step": 110000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998496545254, + "loss": 8.4944, + "step": 115000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998361771126, + "loss": 8.4752, + "step": 120000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998221212701, + "loss": 8.4344, + "step": 125000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998074869983, + "loss": 8.4137, + "step": 130000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997922742969, + "loss": 8.468, + "step": 135000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997764831663, + "loss": 8.5349, + "step": 140000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997601136063, + "loss": 8.5042, + "step": 145000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997431656169, + "loss": 8.5147, + "step": 150000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997256391984, + "loss": 8.4722, + "step": 155000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997075343505, + "loss": 8.4144, + "step": 160000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999996888510735, + "loss": 8.3935, + "step": 165000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999996695893673, + "loss": 8.4244, + "step": 170000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999996497492322, + "loss": 8.4991, + "step": 175000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999996293306679, + "loss": 8.5739, + "step": 180000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999996083336746, + "loss": 8.5794, + "step": 185000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999995867582523, + "loss": 8.5878, + "step": 190000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999995646044011, + "loss": 8.6133, + "step": 195000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999995418721212, + "loss": 8.5593, + "step": 200000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999995185614123, + "loss": 8.5519, + "step": 205000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999994946722748, + "loss": 8.5628, + "step": 210000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999994702047085, + "loss": 8.5084, + "step": 215000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999994451587136, + "loss": 8.5437, + "step": 220000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999994195342902, + "loss": 8.5508, + "step": 225000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999993933314382, + "loss": 8.5281, + "step": 230000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999993665501577, + "loss": 8.5282, + "step": 235000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999993391904488, + "loss": 8.5707, + "step": 240000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999993112523117, + "loss": 8.5596, + "step": 245000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999992827357463, + "loss": 8.554, + "step": 250000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999992536407527, + "loss": 8.5556, + "step": 255000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999992239673309, + "loss": 8.5767, + "step": 260000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999193715481, + "loss": 8.579, + "step": 265000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999991628852031, + "loss": 8.5437, + "step": 270000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999991314764974, + "loss": 8.5226, + "step": 275000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999990994893638, + "loss": 8.5095, + "step": 280000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999990669238024, + "loss": 8.521, + "step": 285000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999990337798134, + "loss": 8.5687, + "step": 290000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999990000573966, + "loss": 8.567, + "step": 295000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999989657565524, + "loss": 8.5399, + "step": 300000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999989308772806, + "loss": 8.5336, + "step": 305000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999988954195816, + "loss": 8.5427, + "step": 310000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999988593834551, + "loss": 8.5369, + "step": 315000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999988227689015, + "loss": 8.5183, + "step": 320000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999987855759207, + "loss": 8.5165, + "step": 325000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999987478045128, + "loss": 8.5009, + "step": 330000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499998709454678, + "loss": 8.4832, + "step": 335000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999986705264164, + "loss": 8.4453, + "step": 340000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999986310197279, + "loss": 8.4457, + "step": 345000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999985909346127, + "loss": 8.4958, + "step": 350000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499998550271071, + "loss": 8.5252, + "step": 355000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999985090291027, + "loss": 8.526, + "step": 360000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999984672087081, + "loss": 8.4962, + "step": 365000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499998424809887, + "loss": 8.471, + "step": 370000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999983818326398, + "loss": 8.4943, + "step": 375000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999983382769665, + "loss": 8.5109, + "step": 380000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999982941428673, + "loss": 8.5156, + "step": 385000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999982494303419, + "loss": 8.4645, + "step": 390000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999982041393909, + "loss": 8.4879, + "step": 395000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999981582700142, + "loss": 8.5108, + "step": 400000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999981118222117, + "loss": 8.545, + "step": 405000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999980647959839, + "loss": 8.5518, + "step": 410000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999980171913307, + "loss": 8.5067, + "step": 415000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999979690082521, + "loss": 8.537, + "step": 420000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999979202467483, + "loss": 8.4697, + "step": 425000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999978709068197, + "loss": 8.4782, + "step": 430000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499997820988466, + "loss": 8.4981, + "step": 435000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999977704916876, + "loss": 8.5571, + "step": 440000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999977194164844, + "loss": 8.514, + "step": 445000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999976677628566, + "loss": 8.4959, + "step": 450000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999976155308043, + "loss": 8.58, + "step": 455000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999975627203278, + "loss": 8.6084, + "step": 460000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999975093314269, + "loss": 8.5743, + "step": 465000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499997455364102, + "loss": 8.5643, + "step": 470000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999974008183532, + "loss": 8.4961, + "step": 475000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999973456941804, + "loss": 8.4412, + "step": 480000 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499997289991584, + "loss": 8.4485, + "step": 485000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999972337105639, + "loss": 8.4707, + "step": 490000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999971768511204, + "loss": 8.4722, + "step": 495000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999971194132537, + "loss": 8.486, + "step": 500000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999970613969636, + "loss": 8.438, + "step": 505000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999970028022505, + "loss": 8.4606, + "step": 510000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999969436291146, + "loss": 8.518, + "step": 515000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999968838775557, + "loss": 8.5148, + "step": 520000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999968235475743, + "loss": 8.5136, + "step": 525000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999967626391703, + "loss": 8.4632, + "step": 530000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999967011523439, + "loss": 8.4725, + "step": 535000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999966390870954, + "loss": 8.4696, + "step": 540000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999965764434247, + "loss": 8.4397, + "step": 545000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999965132213321, + "loss": 8.4486, + "step": 550000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999964494208178, + "loss": 8.4202, + "step": 555000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999963850418817, + "loss": 8.4795, + "step": 560000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999963200845243, + "loss": 8.5227, + "step": 565000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999962545487453, + "loss": 8.5248, + "step": 570000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999961884345453, + "loss": 8.5308, + "step": 575000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999961217419241, + "loss": 8.5287, + "step": 580000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999960544708822, + "loss": 8.5622, + "step": 585000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999959866214195, + "loss": 8.5303, + "step": 590000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999959181935361, + "loss": 8.4332, + "step": 595000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999958491872324, + "loss": 8.4176, + "step": 600000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999957796025085, + "loss": 8.3863, + "step": 605000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999957094393644, + "loss": 8.3405, + "step": 610000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999956386978003, + "loss": 8.3725, + "step": 615000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999955673778165, + "loss": 8.4165, + "step": 620000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999954954794132, + "loss": 8.3808, + "step": 625000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999954230025904, + "loss": 8.3515, + "step": 630000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999953499473482, + "loss": 8.3555, + "step": 635000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499995276313687, + "loss": 8.4179, + "step": 640000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999952021016069, + "loss": 8.4277, + "step": 645000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499995127311108, + "loss": 8.4402, + "step": 650000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999950519421905, + "loss": 8.4472, + "step": 655000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999949759948546, + "loss": 8.4497, + "step": 660000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999948994691005, + "loss": 8.4436, + "step": 665000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999948223649283, + "loss": 8.4619, + "step": 670000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999947446823382, + "loss": 8.4528, + "step": 675000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999946664213305, + "loss": 8.4476, + "step": 680000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999945875819051, + "loss": 8.5146, + "step": 685000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999945081640625, + "loss": 8.5345, + "step": 690000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999944281678027, + "loss": 8.5706, + "step": 695000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499994347593126, + "loss": 8.5314, + "step": 700000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999942664400324, + "loss": 8.5054, + "step": 705000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999941847085223, + "loss": 8.5103, + "step": 710000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999941023985958, + "loss": 8.5057, + "step": 715000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499994019510253, + "loss": 8.4851, + "step": 720000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999939360434942, + "loss": 8.4865, + "step": 725000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999938519983196, + "loss": 8.4787, + "step": 730000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999937673747293, + "loss": 8.4496, + "step": 735000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999936821727237, + "loss": 8.4326, + "step": 740000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999935963923027, + "loss": 8.387, + "step": 745000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999935100334667, + "loss": 8.4004, + "step": 750000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499993423096216, + "loss": 8.4077, + "step": 755000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999933355805504, + "loss": 8.3867, + "step": 760000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999932474864706, + "loss": 8.4258, + "step": 765000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999931588139764, + "loss": 8.4821, + "step": 770000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999930695630682, + "loss": 8.4397, + "step": 775000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999929797337462, + "loss": 8.5221, + "step": 780000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999928893260105, + "loss": 8.4864, + "step": 785000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999927983398616, + "loss": 8.4338, + "step": 790000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999927067752993, + "loss": 8.4292, + "step": 795000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999926146323241, + "loss": 8.3973, + "step": 800000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999925219109361, + "loss": 8.4031, + "step": 805000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999924286111355, + "loss": 8.3967, + "step": 810000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999923347329226, + "loss": 8.4021, + "step": 815000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999922402762977, + "loss": 8.414, + "step": 820000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999921452412606, + "loss": 8.4847, + "step": 825000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499992049627812, + "loss": 8.4377, + "step": 830000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499991953435952, + "loss": 8.4167, + "step": 835000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999918566656806, + "loss": 8.445, + "step": 840000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999917593169984, + "loss": 8.4625, + "step": 845000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999916613899052, + "loss": 8.459, + "step": 850000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999915628844015, + "loss": 8.4831, + "step": 855000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999914638004875, + "loss": 8.478, + "step": 860000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999913641381633, + "loss": 8.4598, + "step": 865000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999912638974292, + "loss": 8.4568, + "step": 870000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999911630782856, + "loss": 8.4423, + "step": 875000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999910616807323, + "loss": 8.4638, + "step": 880000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999909597047702, + "loss": 8.4405, + "step": 885000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999908571503989, + "loss": 8.4464, + "step": 890000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999907540176189, + "loss": 8.4808, + "step": 895000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999906503064305, + "loss": 8.437, + "step": 900000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999905460168339, + "loss": 8.4214, + "step": 905000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999904411488293, + "loss": 8.4418, + "step": 910000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999903357024169, + "loss": 8.5303, + "step": 915000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999902296775971, + "loss": 8.4865, + "step": 920000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00049999012307437, + "loss": 8.4779, + "step": 925000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999900158927358, + "loss": 8.4884, + "step": 930000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999899081326949, + "loss": 8.4641, + "step": 935000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999897997942475, + "loss": 8.485, + "step": 940000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999896908773939, + "loss": 8.4615, + "step": 945000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999895813821341, + "loss": 8.4412, + "step": 950000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999894713084688, + "loss": 8.4274, + "step": 955000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999893606563978, + "loss": 8.4331, + "step": 960000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999892494259216, + "loss": 8.4737, + "step": 965000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999891376170404, + "loss": 8.4471, + "step": 970000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999890252297545, + "loss": 8.4929, + "step": 975000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999889122640642, + "loss": 8.4602, + "step": 980000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999887987199697, + "loss": 8.4671, + "step": 985000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999886845974712, + "loss": 8.4589, + "step": 990000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999885698965689, + "loss": 8.47, + "step": 995000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999884546172634, + "loss": 8.5097, + "step": 1000000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999883387595546, + "loss": 8.5082, + "step": 1005000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499988222323443, + "loss": 8.4846, + "step": 1010000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999881053089287, + "loss": 8.4732, + "step": 1015000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999879877160121, + "loss": 8.443, + "step": 1020000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999878695446934, + "loss": 8.4313, + "step": 1025000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499987750794973, + "loss": 8.4374, + "step": 1030000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499987631466851, + "loss": 8.4242, + "step": 1035000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999875115603279, + "loss": 8.4282, + "step": 1040000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999873910754036, + "loss": 8.4442, + "step": 1045000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999872700120788, + "loss": 8.5036, + "step": 1050000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999871483703536, + "loss": 8.4339, + "step": 1055000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999870261502281, + "loss": 8.399, + "step": 1060000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999869033517028, + "loss": 8.4156, + "step": 1065000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499986779974778, + "loss": 8.478, + "step": 1070000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999866560194539, + "loss": 8.5151, + "step": 1075000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499986531485731, + "loss": 8.5111, + "step": 1080000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999864063736091, + "loss": 8.5124, + "step": 1085000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499986280683089, + "loss": 8.4821, + "step": 1090000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999861544141706, + "loss": 8.4595, + "step": 1095000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999860275668545, + "loss": 8.4339, + "step": 1100000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999859001411409, + "loss": 8.3923, + "step": 1105000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00049998577213703, + "loss": 8.3647, + "step": 1110000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999856435545222, + "loss": 8.3865, + "step": 1115000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999855143936176, + "loss": 8.4136, + "step": 1120000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999853846543169, + "loss": 8.4033, + "step": 1125000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00049998525433662, + "loss": 8.4242, + "step": 1130000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999851234405274, + "loss": 8.3723, + "step": 1135000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999849919660393, + "loss": 8.4118, + "step": 1140000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999848599131562, + "loss": 8.3804, + "step": 1145000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999847272818781, + "loss": 8.4146, + "step": 1150000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999845940722056, + "loss": 8.4656, + "step": 1155000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999844602841388, + "loss": 8.4474, + "step": 1160000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999843259176781, + "loss": 8.4551, + "step": 1165000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999841909728239, + "loss": 8.4472, + "step": 1170000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999840554495763, + "loss": 8.4341, + "step": 1175000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999839193479358, + "loss": 8.4335, + "step": 1180000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999837826679027, + "loss": 8.4043, + "step": 1185000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999836454094771, + "loss": 8.3666, + "step": 1190000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999835075726595, + "loss": 8.3444, + "step": 1195000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999833691574503, + "loss": 8.3216, + "step": 1200000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999832301638497, + "loss": 8.3572, + "step": 1205000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999830905918581, + "loss": 8.3965, + "step": 1210000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999829504414756, + "loss": 8.4237, + "step": 1215000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999828097127029, + "loss": 8.3765, + "step": 1220000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999826684055398, + "loss": 8.3266, + "step": 1225000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999825265199872, + "loss": 8.322, + "step": 1230000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499982384056045, + "loss": 8.3367, + "step": 1235000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999822410137139, + "loss": 8.3544, + "step": 1240000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999820973929939, + "loss": 8.3379, + "step": 1245000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999819531938854, + "loss": 8.359, + "step": 1250000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999818084163889, + "loss": 8.3699, + "step": 1255000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999816630605047, + "loss": 8.3825, + "step": 1260000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999815171262328, + "loss": 8.3895, + "step": 1265000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499981370613574, + "loss": 8.3835, + "step": 1270000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999812235225284, + "loss": 8.3684, + "step": 1275000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999810758530964, + "loss": 8.3881, + "step": 1280000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999809276052783, + "loss": 8.376, + "step": 1285000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999807787790746, + "loss": 8.4006, + "step": 1290000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999806293744853, + "loss": 8.3775, + "step": 1295000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499980479391511, + "loss": 8.3253, + "step": 1300000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999803288301521, + "loss": 8.3663, + "step": 1305000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999801776904088, + "loss": 8.4, + "step": 1310000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999800259722815, + "loss": 8.3802, + "step": 1315000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999798736757706, + "loss": 8.3608, + "step": 1320000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999797208008763, + "loss": 8.3663, + "step": 1325000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999795673475992, + "loss": 8.3449, + "step": 1330000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999794133159394, + "loss": 8.3615, + "step": 1335000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999792587058974, + "loss": 8.3849, + "step": 1340000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999791035174736, + "loss": 8.3867, + "step": 1345000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999789477506682, + "loss": 8.3461, + "step": 1350000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999787914054816, + "loss": 8.2994, + "step": 1355000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999786344819144, + "loss": 8.3118, + "step": 1360000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999784769799666, + "loss": 8.3385, + "step": 1365000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999783188996388, + "loss": 8.3052, + "step": 1370000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999781602409313, + "loss": 8.3561, + "step": 1375000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999780010038445, + "loss": 8.3534, + "step": 1380000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999778411883786, + "loss": 8.3714, + "step": 1385000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999776807945342, + "loss": 8.3757, + "step": 1390000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999775198223117, + "loss": 8.3769, + "step": 1395000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999773582717112, + "loss": 8.3468, + "step": 1400000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999771961427332, + "loss": 8.3378, + "step": 1405000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999770334353782, + "loss": 8.3782, + "step": 1410000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999768701496464, + "loss": 8.3934, + "step": 1415000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999767062855384, + "loss": 8.3977, + "step": 1420000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999765418430543, + "loss": 8.3509, + "step": 1425000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999763768221946, + "loss": 8.3453, + "step": 1430000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999762112229598, + "loss": 8.2951, + "step": 1435000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999760450453501, + "loss": 8.3644, + "step": 1440000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499975878289366, + "loss": 8.3911, + "step": 1445000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999757109550078, + "loss": 8.3925, + "step": 1450000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499975543042276, + "loss": 8.3537, + "step": 1455000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999753745511709, + "loss": 8.379, + "step": 1460000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999752054816929, + "loss": 8.3786, + "step": 1465000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999750358338425, + "loss": 8.3861, + "step": 1470000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999748656076198, + "loss": 8.4382, + "step": 1475000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999746948030256, + "loss": 8.4304, + "step": 1480000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999745234200599, + "loss": 8.4402, + "step": 1485000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999743514587234, + "loss": 8.3865, + "step": 1490000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999741789190165, + "loss": 8.4104, + "step": 1495000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999740058009392, + "loss": 8.4022, + "step": 1500000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999738321044923, + "loss": 8.4134, + "step": 1505000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999736578296762, + "loss": 8.4199, + "step": 1510000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999734829764911, + "loss": 8.4059, + "step": 1515000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999733075449375, + "loss": 8.3262, + "step": 1520000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999731315350158, + "loss": 8.2811, + "step": 1525000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999729549467263, + "loss": 8.2573, + "step": 1530000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999727777800696, + "loss": 8.3032, + "step": 1535000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999726000350461, + "loss": 8.3267, + "step": 1540000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499972421711656, + "loss": 8.3838, + "step": 1545000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999722428098999, + "loss": 8.38, + "step": 1550000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999720633297782, + "loss": 8.3361, + "step": 1555000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999718832712913, + "loss": 8.3276, + "step": 1560000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999717026344394, + "loss": 8.294, + "step": 1565000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999715214192233, + "loss": 8.3313, + "step": 1570000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999713396256432, + "loss": 8.37, + "step": 1575000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999711572536995, + "loss": 8.3763, + "step": 1580000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999709743033928, + "loss": 8.4021, + "step": 1585000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999707907747233, + "loss": 8.3807, + "step": 1590000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999706066676915, + "loss": 8.4022, + "step": 1595000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999704219822979, + "loss": 8.4175, + "step": 1600000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999702367185429, + "loss": 8.3805, + "step": 1605000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999700508764267, + "loss": 8.3644, + "step": 1610000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999698644559501, + "loss": 8.3482, + "step": 1615000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999696774571134, + "loss": 8.3087, + "step": 1620000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499969489879917, + "loss": 8.2912, + "step": 1625000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999693017243612, + "loss": 8.2988, + "step": 1630000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999691129904467, + "loss": 8.2964, + "step": 1635000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999689236781737, + "loss": 8.3291, + "step": 1640000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999687337875427, + "loss": 8.3241, + "step": 1645000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999685433185544, + "loss": 8.3738, + "step": 1650000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999683522712088, + "loss": 8.3744, + "step": 1655000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999681606455066, + "loss": 8.3795, + "step": 1660000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999679684414483, + "loss": 8.4056, + "step": 1665000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999677756590342, + "loss": 8.3822, + "step": 1670000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999675822982648, + "loss": 8.3657, + "step": 1675000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999673883591406, + "loss": 8.3292, + "step": 1680000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999671938416619, + "loss": 8.3594, + "step": 1685000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999669987458292, + "loss": 8.3728, + "step": 1690000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999668030716431, + "loss": 8.3581, + "step": 1695000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999666068191039, + "loss": 8.3038, + "step": 1700000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999664099882121, + "loss": 8.3271, + "step": 1705000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999662125789682, + "loss": 8.307, + "step": 1710000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999660145913726, + "loss": 8.3064, + "step": 1715000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999658160254258, + "loss": 8.3224, + "step": 1720000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999656168811282, + "loss": 8.3283, + "step": 1725000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999654171584802, + "loss": 8.2952, + "step": 1730000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999652168574825, + "loss": 8.263, + "step": 1735000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999650159781353, + "loss": 8.2434, + "step": 1740000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999648145204393, + "loss": 8.2752, + "step": 1745000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999646124843948, + "loss": 8.2715, + "step": 1750000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999644098700023, + "loss": 8.2639, + "step": 1755000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999642066772622, + "loss": 8.2642, + "step": 1760000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999640029061752, + "loss": 8.2577, + "step": 1765000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999637985567415, + "loss": 8.2835, + "step": 1770000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999635936289618, + "loss": 8.3218, + "step": 1775000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999633881228365, + "loss": 8.3525, + "step": 1780000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999631820383659, + "loss": 8.3198, + "step": 1785000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999629753755507, + "loss": 8.3218, + "step": 1790000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999627681343913, + "loss": 8.2785, + "step": 1795000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999625603148882, + "loss": 8.2772, + "step": 1800000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999623519170419, + "loss": 8.3014, + "step": 1805000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999621429408528, + "loss": 8.3073, + "step": 1810000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999619333863214, + "loss": 8.2848, + "step": 1815000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999617232534483, + "loss": 8.3051, + "step": 1820000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999615125422339, + "loss": 8.2821, + "step": 1825000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999613012526786, + "loss": 8.2864, + "step": 1830000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999610893847829, + "loss": 8.293, + "step": 1835000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999608769385475, + "loss": 8.261, + "step": 1840000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999606639139728, + "loss": 8.2727, + "step": 1845000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999604503110592, + "loss": 8.283, + "step": 1850000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999602361298073, + "loss": 8.2857, + "step": 1855000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999600213702174, + "loss": 8.3006, + "step": 1860000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999598060322902, + "loss": 8.3414, + "step": 1865000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999595901160262, + "loss": 8.3064, + "step": 1870000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999593736214257, + "loss": 8.2756, + "step": 1875000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999591565484895, + "loss": 8.2984, + "step": 1880000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999589388972178, + "loss": 8.3053, + "step": 1885000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999587206676113, + "loss": 8.2835, + "step": 1890000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999585018596705, + "loss": 8.289, + "step": 1895000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999582824733958, + "loss": 8.2592, + "step": 1900000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999580625087878, + "loss": 8.2264, + "step": 1905000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499957841965847, + "loss": 8.2443, + "step": 1910000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999576208445738, + "loss": 8.244, + "step": 1915000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999573991449687, + "loss": 8.2067, + "step": 1920000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999571768670325, + "loss": 8.2082, + "step": 1925000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999569540107654, + "loss": 8.2183, + "step": 1930000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999567305761681, + "loss": 8.2743, + "step": 1935000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499956506563241, + "loss": 8.2565, + "step": 1940000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999562819719847, + "loss": 8.2867, + "step": 1945000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999560568023997, + "loss": 8.3107, + "step": 1950000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999558310544865, + "loss": 8.3057, + "step": 1955000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999556047282456, + "loss": 8.3432, + "step": 1960000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999553778236776, + "loss": 8.3441, + "step": 1965000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499955150340783, + "loss": 8.3092, + "step": 1970000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999549222795622, + "loss": 8.2568, + "step": 1975000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999546936400159, + "loss": 8.2439, + "step": 1980000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999544644221446, + "loss": 8.2598, + "step": 1985000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999542346259488, + "loss": 8.2371, + "step": 1990000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499954004251429, + "loss": 8.2255, + "step": 1995000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999537732985857, + "loss": 8.2308, + "step": 2000000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999535417674196, + "loss": 8.2345, + "step": 2005000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499953309657931, + "loss": 8.2342, + "step": 2010000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999530769701207, + "loss": 8.2473, + "step": 2015000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999528437039891, + "loss": 8.2992, + "step": 2020000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999526098595367, + "loss": 8.2668, + "step": 2025000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499952375436764, + "loss": 8.2865, + "step": 2030000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999521404356718, + "loss": 8.2854, + "step": 2035000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999519048562603, + "loss": 8.2697, + "step": 2040000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999516686985304, + "loss": 8.2916, + "step": 2045000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999514319624823, + "loss": 8.2798, + "step": 2050000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999511946481167, + "loss": 8.299, + "step": 2055000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999509567554343, + "loss": 8.2762, + "step": 2060000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999507182844355, + "loss": 8.247, + "step": 2065000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999504792351209, + "loss": 8.2446, + "step": 2070000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999502396074908, + "loss": 8.2318, + "step": 2075000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999499994015462, + "loss": 8.266, + "step": 2080000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999497586172873, + "loss": 8.2625, + "step": 2085000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999495172547148, + "loss": 8.2462, + "step": 2090000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999492753138293, + "loss": 8.2909, + "step": 2095000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999490327946312, + "loss": 8.2519, + "step": 2100000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999487896971212, + "loss": 8.2138, + "step": 2105000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999485460212998, + "loss": 8.236, + "step": 2110000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999483017671676, + "loss": 8.2644, + "step": 2115000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999480569347252, + "loss": 8.317, + "step": 2120000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999478115239732, + "loss": 8.292, + "step": 2125000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999475655349119, + "loss": 8.2746, + "step": 2130000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999473189675422, + "loss": 8.2593, + "step": 2135000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999470718218645, + "loss": 8.2784, + "step": 2140000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999468240978794, + "loss": 8.2614, + "step": 2145000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999465757955875, + "loss": 8.2859, + "step": 2150000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999463269149892, + "loss": 8.2754, + "step": 2155000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999460774560853, + "loss": 8.2623, + "step": 2160000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999458274188764, + "loss": 8.2901, + "step": 2165000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999455768033628, + "loss": 8.2651, + "step": 2170000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999453256095454, + "loss": 8.2527, + "step": 2175000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999450738374246, + "loss": 8.2246, + "step": 2180000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499944821487001, + "loss": 8.222, + "step": 2185000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999445685582752, + "loss": 8.1857, + "step": 2190000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999443150512479, + "loss": 8.1593, + "step": 2195000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999440609659195, + "loss": 8.1482, + "step": 2200000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999438063022906, + "loss": 8.2034, + "step": 2205000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999435510603619, + "loss": 8.2233, + "step": 2210000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999432952401341, + "loss": 8.2332, + "step": 2215000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999430388416074, + "loss": 8.2519, + "step": 2220000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999427818647827, + "loss": 8.2316, + "step": 2225000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999425243096605, + "loss": 8.2477, + "step": 2230000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999422661762416, + "loss": 8.2251, + "step": 2235000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999420074645262, + "loss": 8.2782, + "step": 2240000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999417481745153, + "loss": 8.26, + "step": 2245000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999414883062092, + "loss": 8.2635, + "step": 2250000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999412278596087, + "loss": 8.2708, + "step": 2255000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999409668347142, + "loss": 8.2981, + "step": 2260000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999407052315265, + "loss": 8.2581, + "step": 2265000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999404430500461, + "loss": 8.2532, + "step": 2270000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999401802902737, + "loss": 8.2697, + "step": 2275000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999399169522098, + "loss": 8.2703, + "step": 2280000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999396530358551, + "loss": 8.2604, + "step": 2285000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999393885412101, + "loss": 8.2728, + "step": 2290000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999391234682756, + "loss": 8.2636, + "step": 2295000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499938857817052, + "loss": 8.2473, + "step": 2300000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999385915875399, + "loss": 8.2496, + "step": 2305000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999383247797401, + "loss": 8.2689, + "step": 2310000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999380573936532, + "loss": 8.303, + "step": 2315000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999377894292798, + "loss": 8.2291, + "step": 2320000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999375208866203, + "loss": 8.2713, + "step": 2325000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999372517656756, + "loss": 8.3408, + "step": 2330000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999369820664463, + "loss": 8.2794, + "step": 2335000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999367117889328, + "loss": 8.3002, + "step": 2340000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999364409331358, + "loss": 8.2982, + "step": 2345000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999361694990562, + "loss": 8.2878, + "step": 2350000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999358974866943, + "loss": 8.2482, + "step": 2355000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999356248960509, + "loss": 8.2382, + "step": 2360000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999353517271267, + "loss": 8.2243, + "step": 2365000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499935077979922, + "loss": 8.1924, + "step": 2370000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999348036544378, + "loss": 8.1962, + "step": 2375000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999345287506745, + "loss": 8.1667, + "step": 2380000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999342532686328, + "loss": 8.1605, + "step": 2385000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999339772083134, + "loss": 8.1692, + "step": 2390000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499933700569717, + "loss": 8.1645, + "step": 2395000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499933423352844, + "loss": 8.1408, + "step": 2400000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999331455576953, + "loss": 8.134, + "step": 2405000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999328671842712, + "loss": 8.1468, + "step": 2410000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999325882325728, + "loss": 8.113, + "step": 2415000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999323087026004, + "loss": 8.1044, + "step": 2420000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999320285943548, + "loss": 8.0701, + "step": 2425000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999317479078366, + "loss": 8.0466, + "step": 2430000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999314666430465, + "loss": 8.0082, + "step": 2435000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499931184799985, + "loss": 8.0217, + "step": 2440000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999309023786529, + "loss": 8.0033, + "step": 2445000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999306193790509, + "loss": 8.0105, + "step": 2450000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999303358011794, + "loss": 7.9985, + "step": 2455000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999300516450393, + "loss": 8.0164, + "step": 2460000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999297669106312, + "loss": 8.0093, + "step": 2465000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999294815979557, + "loss": 8.0111, + "step": 2470000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999291957070135, + "loss": 8.0337, + "step": 2475000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999289092378053, + "loss": 8.0641, + "step": 2480000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999286221903317, + "loss": 7.9939, + "step": 2485000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999283345645934, + "loss": 8.0228, + "step": 2490000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499928046360591, + "loss": 8.0458, + "step": 2495000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999277575783253, + "loss": 8.0302, + "step": 2500000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999274682177968, + "loss": 8.0451, + "step": 2505000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999271782790063, + "loss": 8.0334, + "step": 2510000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999268877619545, + "loss": 8.0034, + "step": 2515000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999265966666419, + "loss": 7.9877, + "step": 2520000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999263049930692, + "loss": 8.0222, + "step": 2525000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999260127412374, + "loss": 7.993, + "step": 2530000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999257199111468, + "loss": 7.9746, + "step": 2535000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999254265027982, + "loss": 7.9478, + "step": 2540000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999251325161922, + "loss": 7.9584, + "step": 2545000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999248379513296, + "loss": 7.9407, + "step": 2550000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499924542808211, + "loss": 7.8694, + "step": 2555000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999242470868372, + "loss": 7.8591, + "step": 2560000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999239507872088, + "loss": 7.8295, + "step": 2565000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999236539093266, + "loss": 7.7761, + "step": 2570000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999233564531911, + "loss": 7.7571, + "step": 2575000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499923058418803, + "loss": 7.7923, + "step": 2580000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999227598061631, + "loss": 7.7985, + "step": 2585000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499922460615272, + "loss": 7.7888, + "step": 2590000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999221608461306, + "loss": 7.7891, + "step": 2595000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999218604987393, + "loss": 7.7764, + "step": 2600000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499921559573099, + "loss": 7.7866, + "step": 2605000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999212580692103, + "loss": 7.779, + "step": 2610000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999209559870738, + "loss": 7.7859, + "step": 2615000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999206533266905, + "loss": 7.7553, + "step": 2620000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999203500880609, + "loss": 7.723, + "step": 2625000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999200462711857, + "loss": 7.6725, + "step": 2630000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999197418760656, + "loss": 7.7191, + "step": 2635000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999194369027014, + "loss": 7.7714, + "step": 2640000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999191313510937, + "loss": 7.7341, + "step": 2645000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999188252212432, + "loss": 7.7491, + "step": 2650000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999185185131507, + "loss": 7.7664, + "step": 2655000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999182112268169, + "loss": 7.7736, + "step": 2660000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999179033622424, + "loss": 7.7262, + "step": 2665000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999175949194281, + "loss": 7.6883, + "step": 2670000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999172858983745, + "loss": 7.7069, + "step": 2675000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999169762990825, + "loss": 7.6417, + "step": 2680000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999166661215527, + "loss": 7.6366, + "step": 2685000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999163553657858, + "loss": 7.6692, + "step": 2690000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999160440317825, + "loss": 7.6494, + "step": 2695000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999157321195438, + "loss": 7.6101, + "step": 2700000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999154196290701, + "loss": 7.6184, + "step": 2705000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999151065603622, + "loss": 7.6237, + "step": 2710000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999147929134209, + "loss": 7.6552, + "step": 2715000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999144786882469, + "loss": 7.6579, + "step": 2720000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999141638848409, + "loss": 7.6001, + "step": 2725000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999138485032035, + "loss": 7.5975, + "step": 2730000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999135325433358, + "loss": 7.6248, + "step": 2735000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999132160052382, + "loss": 7.6358, + "step": 2740000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999128988889115, + "loss": 7.5984, + "step": 2745000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999125811943565, + "loss": 7.5925, + "step": 2750000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999122629215739, + "loss": 7.6375, + "step": 2755000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999119440705644, + "loss": 7.6285, + "step": 2760000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999116246413288, + "loss": 7.6137, + "step": 2765000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999113046338678, + "loss": 7.6145, + "step": 2770000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999109840481822, + "loss": 7.6126, + "step": 2775000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999106628842726, + "loss": 7.5928, + "step": 2780000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999103411421399, + "loss": 7.5962, + "step": 2785000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999100188217848, + "loss": 7.5763, + "step": 2790000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999096959232082, + "loss": 7.5711, + "step": 2795000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999093724464104, + "loss": 7.5754, + "step": 2800000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999090483913926, + "loss": 7.5963, + "step": 2805000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999087237581553, + "loss": 7.581, + "step": 2810000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999083985466992, + "loss": 7.5872, + "step": 2815000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999080727570254, + "loss": 7.5772, + "step": 2820000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999077463891343, + "loss": 7.5629, + "step": 2825000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999074194430268, + "loss": 7.5575, + "step": 2830000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999070919187037, + "loss": 7.5157, + "step": 2835000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999067638161657, + "loss": 7.5362, + "step": 2840000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999064351354135, + "loss": 7.5166, + "step": 2845000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999061058764481, + "loss": 7.5412, + "step": 2850000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999057760392699, + "loss": 7.5056, + "step": 2855000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999054456238798, + "loss": 7.5058, + "step": 2860000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999051146302787, + "loss": 7.4933, + "step": 2865000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999047830584674, + "loss": 7.4638, + "step": 2870000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999044509084463, + "loss": 7.4749, + "step": 2875000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999041181802165, + "loss": 7.4796, + "step": 2880000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999037848737787, + "loss": 7.4789, + "step": 2885000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999034509891337, + "loss": 7.4779, + "step": 2890000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999031165262822, + "loss": 7.4957, + "step": 2895000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499902781485225, + "loss": 7.4947, + "step": 2900000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999024458659628, + "loss": 7.5077, + "step": 2905000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999021096684966, + "loss": 7.491, + "step": 2910000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499901772892827, + "loss": 7.4886, + "step": 2915000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999014355389547, + "loss": 7.4775, + "step": 2920000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999010976068807, + "loss": 7.4578, + "step": 2925000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999007590966056, + "loss": 7.4282, + "step": 2930000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999004200081303, + "loss": 7.4256, + "step": 2935000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999000803414556, + "loss": 7.4379, + "step": 2940000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499899740096582, + "loss": 7.4527, + "step": 2945000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998993992735107, + "loss": 7.4464, + "step": 2950000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998990578722422, + "loss": 7.4258, + "step": 2955000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998987158927775, + "loss": 7.4279, + "step": 2960000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998983733351172, + "loss": 7.4214, + "step": 2965000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998980301992622, + "loss": 7.3997, + "step": 2970000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998976864852133, + "loss": 7.3827, + "step": 2975000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998973421929711, + "loss": 7.3728, + "step": 2980000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998969973225368, + "loss": 7.3785, + "step": 2985000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998966518739109, + "loss": 7.3772, + "step": 2990000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998963058470941, + "loss": 7.371, + "step": 2995000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998959592420874, + "loss": 7.375, + "step": 3000000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998956120588916, + "loss": 7.3659, + "step": 3005000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998952642975076, + "loss": 7.3832, + "step": 3010000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499894915957936, + "loss": 7.3574, + "step": 3015000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998945670401776, + "loss": 7.3203, + "step": 3020000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998942175442332, + "loss": 7.3212, + "step": 3025000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499893867470104, + "loss": 7.3108, + "step": 3030000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998935168177901, + "loss": 7.3063, + "step": 3035000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499893165587293, + "loss": 7.2776, + "step": 3040000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998928137786131, + "loss": 7.2842, + "step": 3045000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998924613917513, + "loss": 7.2903, + "step": 3050000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998921084267086, + "loss": 7.2383, + "step": 3055000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998917548834856, + "loss": 7.2406, + "step": 3060000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998914007620832, + "loss": 7.2557, + "step": 3065000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998910460625021, + "loss": 7.2607, + "step": 3070000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998906907847433, + "loss": 7.2446, + "step": 3075000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998903349288077, + "loss": 7.2361, + "step": 3080000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998899784946957, + "loss": 7.2172, + "step": 3085000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998896214824086, + "loss": 7.2064, + "step": 3090000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499889263891947, + "loss": 7.2123, + "step": 3095000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998889057233117, + "loss": 7.165, + "step": 3100000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998885469765036, + "loss": 7.1333, + "step": 3105000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998881876515234, + "loss": 7.1097, + "step": 3110000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998878277483722, + "loss": 7.1224, + "step": 3115000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998874672670505, + "loss": 7.1403, + "step": 3120000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998871062075595, + "loss": 7.1434, + "step": 3125000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998867445698998, + "loss": 7.1053, + "step": 3130000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998863823540723, + "loss": 7.0769, + "step": 3135000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998860195600777, + "loss": 7.061, + "step": 3140000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998856561879171, + "loss": 7.0489, + "step": 3145000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499885292237591, + "loss": 7.0647, + "step": 3150000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998849277091006, + "loss": 7.0588, + "step": 3155000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998845626024465, + "loss": 7.0329, + "step": 3160000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998841969176297, + "loss": 7.0196, + "step": 3165000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499883830654651, + "loss": 7.0169, + "step": 3170000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998834638135112, + "loss": 7.0068, + "step": 3175000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499883096394211, + "loss": 6.9748, + "step": 3180000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998827283967517, + "loss": 6.9723, + "step": 3185000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998823598211337, + "loss": 6.9527, + "step": 3190000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499881990667358, + "loss": 6.9318, + "step": 3195000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998816209354256, + "loss": 6.9324, + "step": 3200000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998812506253371, + "loss": 6.9347, + "step": 3205000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998808797370936, + "loss": 6.9538, + "step": 3210000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998805082706958, + "loss": 6.93, + "step": 3215000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998801362261446, + "loss": 6.9095, + "step": 3220000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998797636034408, + "loss": 6.8771, + "step": 3225000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998793904025855, + "loss": 6.8535, + "step": 3230000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998790166235794, + "loss": 6.8399, + "step": 3235000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998786422664232, + "loss": 6.8154, + "step": 3240000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499878267331118, + "loss": 6.8082, + "step": 3245000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998778918176647, + "loss": 6.7983, + "step": 3250000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998775157260639, + "loss": 6.7802, + "step": 3255000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998771390563167, + "loss": 6.7625, + "step": 3260000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998767618084239, + "loss": 6.747, + "step": 3265000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998763839823864, + "loss": 6.7384, + "step": 3270000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499876005578205, + "loss": 6.7496, + "step": 3275000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998756265958806, + "loss": 6.7399, + "step": 3280000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998752470354143, + "loss": 6.7096, + "step": 3285000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998748668968066, + "loss": 6.6752, + "step": 3290000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998744861800585, + "loss": 6.6532, + "step": 3295000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998741048851711, + "loss": 6.6338, + "step": 3300000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499873723012145, + "loss": 6.5932, + "step": 3305000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998733405609813, + "loss": 6.5927, + "step": 3310000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998729575316808, + "loss": 6.5595, + "step": 3315000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998725739242443, + "loss": 6.5342, + "step": 3320000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998721897386729, + "loss": 6.5184, + "step": 3325000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998718049749673, + "loss": 6.5091, + "step": 3330000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998714196331284, + "loss": 6.4739, + "step": 3335000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998710337131571, + "loss": 6.44, + "step": 3340000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998706472150545, + "loss": 6.431, + "step": 3345000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998702601388211, + "loss": 6.3974, + "step": 3350000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998698724844582, + "loss": 6.3681, + "step": 3355000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998694842519664, + "loss": 6.3462, + "step": 3360000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998690954413468, + "loss": 6.3171, + "step": 3365000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998687060526002, + "loss": 6.3039, + "step": 3370000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998683160857275, + "loss": 6.2849, + "step": 3375000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998679255407295, + "loss": 6.2751, + "step": 3380000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998675344176074, + "loss": 6.2425, + "step": 3385000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998671427163619, + "loss": 6.2323, + "step": 3390000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998667504369939, + "loss": 6.2101, + "step": 3395000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998663575795043, + "loss": 6.206, + "step": 3400000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998659641438941, + "loss": 6.189, + "step": 3405000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998655701301642, + "loss": 6.1647, + "step": 3410000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998651755383154, + "loss": 6.159, + "step": 3415000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998647803683488, + "loss": 6.1456, + "step": 3420000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998643846202649, + "loss": 6.1323, + "step": 3425000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998639882940652, + "loss": 6.1335, + "step": 3430000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998635913897502, + "loss": 6.1191, + "step": 3435000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499863193907321, + "loss": 6.0935, + "step": 3440000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998627958467786, + "loss": 6.0872, + "step": 3445000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998623972081235, + "loss": 6.0752, + "step": 3450000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998619979913571, + "loss": 6.0658, + "step": 3455000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998615981964802, + "loss": 6.0556, + "step": 3460000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998611978234935, + "loss": 6.0453, + "step": 3465000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998607968723981, + "loss": 6.0446, + "step": 3470000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499860395343195, + "loss": 6.0357, + "step": 3475000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998599932358851, + "loss": 6.044, + "step": 3480000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998595905504692, + "loss": 6.0299, + "step": 3485000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998591872869483, + "loss": 6.0288, + "step": 3490000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998587834453233, + "loss": 6.0161, + "step": 3495000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998583790255952, + "loss": 6.0054, + "step": 3500000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499857974027765, + "loss": 5.9942, + "step": 3505000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998575684518334, + "loss": 5.9857, + "step": 3510000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998571622978016, + "loss": 5.9877, + "step": 3515000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998567555656704, + "loss": 5.9862, + "step": 3520000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998563482554407, + "loss": 5.9698, + "step": 3525000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998559403671136, + "loss": 5.9642, + "step": 3530000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998555319006898, + "loss": 5.9608, + "step": 3535000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998551228561707, + "loss": 5.9667, + "step": 3540000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998547132335566, + "loss": 5.9588, + "step": 3545000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998543030328489, + "loss": 5.946, + "step": 3550000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998538922540485, + "loss": 5.9504, + "step": 3555000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998534808971563, + "loss": 5.9391, + "step": 3560000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998530689621733, + "loss": 5.9356, + "step": 3565000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998526564491002, + "loss": 5.9244, + "step": 3570000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998522433579384, + "loss": 5.9272, + "step": 3575000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998518296886885, + "loss": 5.925, + "step": 3580000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998514154413515, + "loss": 5.9168, + "step": 3585000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998510006159284, + "loss": 5.9175, + "step": 3590000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998505852124202, + "loss": 5.9058, + "step": 3595000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499850169230828, + "loss": 5.8985, + "step": 3600000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998497526711524, + "loss": 5.9005, + "step": 3605000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998493355333946, + "loss": 5.9032, + "step": 3610000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998489178175557, + "loss": 5.8991, + "step": 3615000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998484995236363, + "loss": 5.8947, + "step": 3620000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998480806516377, + "loss": 5.8731, + "step": 3625000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998476612015606, + "loss": 5.8891, + "step": 3630000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998472411734063, + "loss": 5.8775, + "step": 3635000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998468205671755, + "loss": 5.8829, + "step": 3640000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998463993828692, + "loss": 5.8646, + "step": 3645000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998459776204884, + "loss": 5.8804, + "step": 3650000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998455552800343, + "loss": 5.8714, + "step": 3655000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998451323615075, + "loss": 5.8705, + "step": 3660000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998447088649092, + "loss": 5.866, + "step": 3665000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998442847902404, + "loss": 5.8661, + "step": 3670000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998438601375018, + "loss": 5.8625, + "step": 3675000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998434349066948, + "loss": 5.8624, + "step": 3680000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998430090978202, + "loss": 5.8611, + "step": 3685000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998425827108789, + "loss": 5.8612, + "step": 3690000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998421557458719, + "loss": 5.8462, + "step": 3695000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998417282028005, + "loss": 5.8616, + "step": 3700000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998413000816651, + "loss": 5.8545, + "step": 3705000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998408713824673, + "loss": 5.8612, + "step": 3710000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998404421052077, + "loss": 5.8517, + "step": 3715000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998400122498874, + "loss": 5.855, + "step": 3720000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998395818165075, + "loss": 5.842, + "step": 3725000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998391508050687, + "loss": 5.8499, + "step": 3730000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998387192155724, + "loss": 5.8411, + "step": 3735000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998382870480193, + "loss": 5.8442, + "step": 3740000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998378543024105, + "loss": 5.8376, + "step": 3745000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499837420978747, + "loss": 5.8465, + "step": 3750000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998369870770298, + "loss": 5.838, + "step": 3755000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00049983655259726, + "loss": 5.8371, + "step": 3760000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998361175394384, + "loss": 5.8365, + "step": 3765000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998356819035662, + "loss": 5.8392, + "step": 3770000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998352456896442, + "loss": 5.8358, + "step": 3775000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998348088976737, + "loss": 5.8399, + "step": 3780000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998343715276554, + "loss": 5.8297, + "step": 3785000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998339335795905, + "loss": 5.825, + "step": 3790000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00049983349505348, + "loss": 5.8235, + "step": 3795000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998330559493248, + "loss": 5.8283, + "step": 3800000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998326162671262, + "loss": 5.8185, + "step": 3805000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998321760068848, + "loss": 5.8234, + "step": 3810000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998317351686019, + "loss": 5.8218, + "step": 3815000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998312937522785, + "loss": 5.8146, + "step": 3820000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998308517579155, + "loss": 5.8176, + "step": 3825000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998304091855141, + "loss": 5.8182, + "step": 3830000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998299660350751, + "loss": 5.8208, + "step": 3835000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998295223065998, + "loss": 5.8194, + "step": 3840000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998290780000891, + "loss": 5.829, + "step": 3845000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998286331155438, + "loss": 5.8147, + "step": 3850000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998281876529652, + "loss": 5.8143, + "step": 3855000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998277416123543, + "loss": 5.8108, + "step": 3860000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998272949937122, + "loss": 5.8082, + "step": 3865000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998268477970397, + "loss": 5.8181, + "step": 3870000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998264000223379, + "loss": 5.8044, + "step": 3875000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998259516696081, + "loss": 5.8118, + "step": 3880000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499825502738851, + "loss": 5.8059, + "step": 3885000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998250532300678, + "loss": 5.8096, + "step": 3890000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998246031432595, + "loss": 5.8124, + "step": 3895000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998241524784272, + "loss": 5.8068, + "step": 3900000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998237012355719, + "loss": 5.8133, + "step": 3905000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998232494146947, + "loss": 5.8056, + "step": 3910000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998227970157965, + "loss": 5.8104, + "step": 3915000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998223440388784, + "loss": 5.8119, + "step": 3920000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998218904839416, + "loss": 5.8128, + "step": 3925000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998214363509869, + "loss": 5.8021, + "step": 3930000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998209816400156, + "loss": 5.8091, + "step": 3935000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998205263510286, + "loss": 5.7921, + "step": 3940000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499820070484027, + "loss": 5.8137, + "step": 3945000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998196140390118, + "loss": 5.8, + "step": 3950000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998191570159842, + "loss": 5.8085, + "step": 3955000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998186994149451, + "loss": 5.8102, + "step": 3960000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998182412358955, + "loss": 5.801, + "step": 3965000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998177824788367, + "loss": 5.7999, + "step": 3970000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998173231437696, + "loss": 5.8047, + "step": 3975000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998168632306954, + "loss": 5.804, + "step": 3980000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499816402739615, + "loss": 5.8068, + "step": 3985000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998159416705294, + "loss": 5.7973, + "step": 3990000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00049981548002344, + "loss": 5.7941, + "step": 3995000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998150177983476, + "loss": 5.7839, + "step": 4000000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998145549952533, + "loss": 5.8023, + "step": 4005000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998140916141582, + "loss": 5.7993, + "step": 4010000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998136276550634, + "loss": 5.7915, + "step": 4015000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998131631179701, + "loss": 5.8068, + "step": 4020000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499812698002879, + "loss": 5.7889, + "step": 4025000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998122323097916, + "loss": 5.7873, + "step": 4030000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998117660387086, + "loss": 5.7947, + "step": 4035000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998112991896313, + "loss": 5.7878, + "step": 4040000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998108317625608, + "loss": 5.8034, + "step": 4045000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998103637574981, + "loss": 5.7911, + "step": 4050000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998098951744443, + "loss": 5.7927, + "step": 4055000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998094260134005, + "loss": 5.7973, + "step": 4060000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998089562743678, + "loss": 5.8046, + "step": 4065000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998084859573472, + "loss": 5.7947, + "step": 4070000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998080150623399, + "loss": 5.7934, + "step": 4075000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998075435893467, + "loss": 5.7906, + "step": 4080000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998070715383692, + "loss": 5.7772, + "step": 4085000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998065989094082, + "loss": 5.7962, + "step": 4090000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998061257024647, + "loss": 5.802, + "step": 4095000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00049980565191754, + "loss": 5.7967, + "step": 4100000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998051775546351, + "loss": 5.7827, + "step": 4105000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998047026137511, + "loss": 5.7844, + "step": 4110000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998042270948891, + "loss": 5.7964, + "step": 4115000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998037509980502, + "loss": 5.8, + "step": 4120000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998032743232355, + "loss": 5.7951, + "step": 4125000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499802797070446, + "loss": 5.8, + "step": 4130000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998023192396831, + "loss": 5.7958, + "step": 4135000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998018408309476, + "loss": 5.8041, + "step": 4140000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998013618442408, + "loss": 5.783, + "step": 4145000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998008822795636, + "loss": 5.7961, + "step": 4150000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998004021369174, + "loss": 5.7924, + "step": 4155000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997999214163031, + "loss": 5.7976, + "step": 4160000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997994401177219, + "loss": 5.798, + "step": 4165000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997989582411748, + "loss": 5.7874, + "step": 4170000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499798475786663, + "loss": 5.7848, + "step": 4175000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997979927541876, + "loss": 5.787, + "step": 4180000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997975091437497, + "loss": 5.7947, + "step": 4185000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997970249553505, + "loss": 5.7894, + "step": 4190000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997965401889911, + "loss": 5.7835, + "step": 4195000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997960548446725, + "loss": 5.7828, + "step": 4200000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499795568922396, + "loss": 5.7876, + "step": 4205000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997950824221626, + "loss": 5.7814, + "step": 4210000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997945953439735, + "loss": 5.7835, + "step": 4215000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997941076878297, + "loss": 5.7961, + "step": 4220000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997936194537325, + "loss": 5.7924, + "step": 4225000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997931306416828, + "loss": 5.7789, + "step": 4230000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499792641251682, + "loss": 5.7848, + "step": 4235000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997921512837311, + "loss": 5.7747, + "step": 4240000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997916607378312, + "loss": 5.7827, + "step": 4245000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997911696139835, + "loss": 5.7856, + "step": 4250000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997906779121892, + "loss": 5.783, + "step": 4255000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997901856324493, + "loss": 5.7864, + "step": 4260000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997896927747649, + "loss": 5.7805, + "step": 4265000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997891993391373, + "loss": 5.7904, + "step": 4270000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997887053255676, + "loss": 5.7943, + "step": 4275000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997882107340569, + "loss": 5.7962, + "step": 4280000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997877155646063, + "loss": 5.7891, + "step": 4285000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997872198172171, + "loss": 5.7867, + "step": 4290000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997867234918902, + "loss": 5.7902, + "step": 4295000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997862265886271, + "loss": 5.7774, + "step": 4300000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997857291074286, + "loss": 5.7809, + "step": 4305000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499785231048296, + "loss": 5.7785, + "step": 4310000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997847324112306, + "loss": 5.7871, + "step": 4315000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997842331962332, + "loss": 5.7785, + "step": 4320000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997837334033054, + "loss": 5.7766, + "step": 4325000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997832330324479, + "loss": 5.7901, + "step": 4330000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997827320836622, + "loss": 5.7853, + "step": 4335000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997822305569493, + "loss": 5.7874, + "step": 4340000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997817284523104, + "loss": 5.7914, + "step": 4345000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997812257697467, + "loss": 5.7902, + "step": 4350000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997807225092593, + "loss": 5.7816, + "step": 4355000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997802186708493, + "loss": 5.775, + "step": 4360000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499779714254518, + "loss": 5.7831, + "step": 4365000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997792092602666, + "loss": 5.7777, + "step": 4370000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997787036880961, + "loss": 5.779, + "step": 4375000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997781975380077, + "loss": 5.7796, + "step": 4380000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997776908100028, + "loss": 5.7781, + "step": 4385000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997771835040823, + "loss": 5.7771, + "step": 4390000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997766756202475, + "loss": 5.7827, + "step": 4395000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997761671584995, + "loss": 5.783, + "step": 4400000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997756581188395, + "loss": 5.7835, + "step": 4405000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997751485012688, + "loss": 5.7873, + "step": 4410000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997746383057885, + "loss": 5.7744, + "step": 4415000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997741275323997, + "loss": 5.7752, + "step": 4420000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997736161811037, + "loss": 5.7843, + "step": 4425000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997731042519015, + "loss": 5.7788, + "step": 4430000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997725917447945, + "loss": 5.7817, + "step": 4435000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997720786597838, + "loss": 5.775, + "step": 4440000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997715649968706, + "loss": 5.7807, + "step": 4445000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499771050756056, + "loss": 5.7893, + "step": 4450000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997705359373412, + "loss": 5.7742, + "step": 4455000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997700205407276, + "loss": 5.7769, + "step": 4460000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997695045662162, + "loss": 5.7797, + "step": 4465000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997689880138083, + "loss": 5.7801, + "step": 4470000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499768470883505, + "loss": 5.777, + "step": 4475000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997679531753075, + "loss": 5.7898, + "step": 4480000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499767434889217, + "loss": 5.7771, + "step": 4485000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997669160252348, + "loss": 5.7716, + "step": 4490000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499766396583362, + "loss": 5.7714, + "step": 4495000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997658765635998, + "loss": 5.7832, + "step": 4500000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997653559659495, + "loss": 5.7738, + "step": 4505000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997648347904122, + "loss": 5.7686, + "step": 4510000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997643130369891, + "loss": 5.7733, + "step": 4515000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997637907056816, + "loss": 5.7771, + "step": 4520000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997632677964906, + "loss": 5.7837, + "step": 4525000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997627443094175, + "loss": 5.7774, + "step": 4530000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997622202444636, + "loss": 5.7821, + "step": 4535000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997616956016298, + "loss": 5.761, + "step": 4540000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997611703809177, + "loss": 5.7795, + "step": 4545000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997606445823283, + "loss": 5.7794, + "step": 4550000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997601182058628, + "loss": 5.7741, + "step": 4555000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997595912515224, + "loss": 5.7792, + "step": 4560000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997590637193084, + "loss": 5.7692, + "step": 4565000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499758535609222, + "loss": 5.7858, + "step": 4570000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997580069212644, + "loss": 5.7781, + "step": 4575000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499757477655437, + "loss": 5.7855, + "step": 4580000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997569478117407, + "loss": 5.7668, + "step": 4585000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499756417390177, + "loss": 5.7681, + "step": 4590000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997558863907469, + "loss": 5.7772, + "step": 4595000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997553548134518, + "loss": 5.7633, + "step": 4600000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997548226582929, + "loss": 5.7828, + "step": 4605000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997542899252714, + "loss": 5.7726, + "step": 4610000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997537566143886, + "loss": 5.7748, + "step": 4615000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997532227256456, + "loss": 5.7676, + "step": 4620000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997526882590437, + "loss": 5.784, + "step": 4625000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997521532145841, + "loss": 5.7698, + "step": 4630000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997516175922682, + "loss": 5.7794, + "step": 4635000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997510813920972, + "loss": 5.7775, + "step": 4640000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997505446140721, + "loss": 5.7744, + "step": 4645000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997500072581943, + "loss": 5.7749, + "step": 4650000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997494693244652, + "loss": 5.7874, + "step": 4655000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997489308128857, + "loss": 5.7749, + "step": 4660000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997483917234574, + "loss": 5.776, + "step": 4665000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997478520561814, + "loss": 5.7645, + "step": 4670000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997473118110589, + "loss": 5.7727, + "step": 4675000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997467709880912, + "loss": 5.7834, + "step": 4680000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997462295872794, + "loss": 5.7788, + "step": 4685000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499745687608625, + "loss": 5.7777, + "step": 4690000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997451450521292, + "loss": 5.7823, + "step": 4695000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997446019177932, + "loss": 5.7738, + "step": 4700000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997440582056182, + "loss": 5.7861, + "step": 4705000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997435139156055, + "loss": 5.7768, + "step": 4710000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997429690477564, + "loss": 5.7802, + "step": 4715000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997424236020722, + "loss": 5.7791, + "step": 4720000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499741877578554, + "loss": 5.7771, + "step": 4725000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997413309772032, + "loss": 5.7744, + "step": 4730000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499740783798021, + "loss": 5.7682, + "step": 4735000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997402360410087, + "loss": 5.771, + "step": 4740000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997396877061676, + "loss": 5.7762, + "step": 4745000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499739138793499, + "loss": 5.7693, + "step": 4750000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499738589303004, + "loss": 5.7779, + "step": 4755000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997380392346839, + "loss": 5.7696, + "step": 4760000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997374885885402, + "loss": 5.7817, + "step": 4765000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997369373645738, + "loss": 5.775, + "step": 4770000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997363855627864, + "loss": 5.777, + "step": 4775000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499735833183179, + "loss": 5.7665, + "step": 4780000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997352802257529, + "loss": 5.7802, + "step": 4785000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997347266905095, + "loss": 5.7794, + "step": 4790000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00049973417257745, + "loss": 5.7793, + "step": 4795000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997336178865756, + "loss": 5.7804, + "step": 4800000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997330626178878, + "loss": 5.7776, + "step": 4805000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997325067713877, + "loss": 5.7697, + "step": 4810000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997319503470765, + "loss": 5.7728, + "step": 4815000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997313933449559, + "loss": 5.7885, + "step": 4820000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997308357650267, + "loss": 5.7713, + "step": 4825000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997302776072905, + "loss": 5.792, + "step": 4830000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997297188717486, + "loss": 5.7741, + "step": 4835000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499729159558402, + "loss": 5.7726, + "step": 4840000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997285996672522, + "loss": 5.7735, + "step": 4845000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997280391983007, + "loss": 5.7674, + "step": 4850000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997274781515484, + "loss": 5.7837, + "step": 4855000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997269165269968, + "loss": 5.7831, + "step": 4860000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997263543246472, + "loss": 5.7744, + "step": 4865000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997257915445009, + "loss": 5.7695, + "step": 4870000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997252281865592, + "loss": 5.7722, + "step": 4875000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997246642508233, + "loss": 5.7746, + "step": 4880000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997240997372946, + "loss": 5.7707, + "step": 4885000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997235346459744, + "loss": 5.789, + "step": 4890000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499722968976864, + "loss": 5.7864, + "step": 4895000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997224027299647, + "loss": 5.7804, + "step": 4900000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997218359052779, + "loss": 5.7705, + "step": 4905000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997212685028048, + "loss": 5.7645, + "step": 4910000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997207005225467, + "loss": 5.7834, + "step": 4915000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499720131964505, + "loss": 5.7885, + "step": 4920000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997195628286809, + "loss": 5.7779, + "step": 4925000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499718993115076, + "loss": 5.7758, + "step": 4930000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997184228236912, + "loss": 5.7711, + "step": 4935000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997178519545281, + "loss": 5.7807, + "step": 4940000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997172805075879, + "loss": 5.7835, + "step": 4945000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997167084828721, + "loss": 5.7817, + "step": 4950000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997161358803818, + "loss": 5.7753, + "step": 4955000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997155627001185, + "loss": 5.7697, + "step": 4960000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997149889420832, + "loss": 5.7705, + "step": 4965000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997144146062778, + "loss": 5.7725, + "step": 4970000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997138396927031, + "loss": 5.7804, + "step": 4975000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997132642013607, + "loss": 5.7711, + "step": 4980000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997126881322518, + "loss": 5.7751, + "step": 4985000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997121114853779, + "loss": 5.7851, + "step": 4990000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997115342607402, + "loss": 5.7743, + "step": 4995000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00049971095645834, + "loss": 5.7762, + "step": 5000000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997103780781788, + "loss": 5.7669, + "step": 5005000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997097991202578, + "loss": 5.7694, + "step": 5010000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997092195845784, + "loss": 5.7716, + "step": 5015000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997086394711419, + "loss": 5.7884, + "step": 5020000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997080587799496, + "loss": 5.7788, + "step": 5025000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499707477511003, + "loss": 5.7753, + "step": 5030000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997068956643034, + "loss": 5.7691, + "step": 5035000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499706313239852, + "loss": 5.781, + "step": 5040000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997057302376502, + "loss": 5.7799, + "step": 5045000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997051466576995, + "loss": 5.7671, + "step": 5050000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997045625000012, + "loss": 5.7771, + "step": 5055000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997039777645565, + "loss": 5.7738, + "step": 5060000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997033924513669, + "loss": 5.7746, + "step": 5065000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997028065604337, + "loss": 5.7758, + "step": 5070000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997022200917582, + "loss": 5.7802, + "step": 5075000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997016330453418, + "loss": 5.7693, + "step": 5080000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499701045421186, + "loss": 5.7806, + "step": 5085000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997004572192919, + "loss": 5.7844, + "step": 5090000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499699868439661, + "loss": 5.7856, + "step": 5095000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996992790822949, + "loss": 5.7773, + "step": 5100000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996986891471945, + "loss": 5.7725, + "step": 5105000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996980986343614, + "loss": 5.779, + "step": 5110000 + } + ], + "logging_steps": 5000, + "max_steps": 326562159, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 5000, + "total_flos": 8.278749111273246e+19, + "train_batch_size": 12, + "trial_name": null, + "trial_params": null +}