{ "best_metric": 37.15599055412812, "best_model_checkpoint": "./whisper-small-finetuned/checkpoint-6000", "epoch": 1.20825, "eval_steps": 2000, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00625, "grad_norm": 11.4181547164917, "learning_rate": 4.7000000000000005e-07, "loss": 1.6163, "step": 50 }, { "epoch": 0.0125, "grad_norm": 7.794241428375244, "learning_rate": 9.7e-07, "loss": 1.3889, "step": 100 }, { "epoch": 0.01875, "grad_norm": 8.827476501464844, "learning_rate": 1.4700000000000001e-06, "loss": 1.2801, "step": 150 }, { "epoch": 0.025, "grad_norm": 6.476517200469971, "learning_rate": 1.97e-06, "loss": 1.2292, "step": 200 }, { "epoch": 0.03125, "grad_norm": 7.033958435058594, "learning_rate": 2.47e-06, "loss": 1.0831, "step": 250 }, { "epoch": 0.0375, "grad_norm": 6.324102401733398, "learning_rate": 2.97e-06, "loss": 1.0518, "step": 300 }, { "epoch": 0.04375, "grad_norm": 6.920722961425781, "learning_rate": 3.4700000000000002e-06, "loss": 0.9768, "step": 350 }, { "epoch": 0.05, "grad_norm": 6.655612468719482, "learning_rate": 3.97e-06, "loss": 0.9377, "step": 400 }, { "epoch": 0.05625, "grad_norm": 8.39956283569336, "learning_rate": 4.47e-06, "loss": 0.9107, "step": 450 }, { "epoch": 0.0625, "grad_norm": 6.610105514526367, "learning_rate": 4.970000000000001e-06, "loss": 0.9365, "step": 500 }, { "epoch": 0.06875, "grad_norm": 7.250374794006348, "learning_rate": 5.470000000000001e-06, "loss": 0.8301, "step": 550 }, { "epoch": 0.075, "grad_norm": 5.3105669021606445, "learning_rate": 5.9700000000000004e-06, "loss": 0.8703, "step": 600 }, { "epoch": 0.08125, "grad_norm": 5.701041221618652, "learning_rate": 6.470000000000001e-06, "loss": 0.8235, "step": 650 }, { "epoch": 0.0875, "grad_norm": 5.628653049468994, "learning_rate": 6.97e-06, "loss": 0.7949, "step": 700 }, { "epoch": 0.09375, "grad_norm": 6.777932643890381, "learning_rate": 7.4700000000000005e-06, "loss": 0.7607, "step": 750 }, { "epoch": 0.1, "grad_norm": 5.994093418121338, "learning_rate": 7.970000000000002e-06, "loss": 0.7144, "step": 800 }, { "epoch": 0.10625, "grad_norm": 7.069298267364502, "learning_rate": 8.47e-06, "loss": 0.807, "step": 850 }, { "epoch": 0.1125, "grad_norm": 4.62778902053833, "learning_rate": 8.97e-06, "loss": 0.7103, "step": 900 }, { "epoch": 0.11875, "grad_norm": 5.539309501647949, "learning_rate": 9.47e-06, "loss": 0.7273, "step": 950 }, { "epoch": 0.125, "grad_norm": 4.9156036376953125, "learning_rate": 9.970000000000001e-06, "loss": 0.6556, "step": 1000 }, { "epoch": 0.13125, "grad_norm": 5.073310375213623, "learning_rate": 9.932857142857145e-06, "loss": 0.7311, "step": 1050 }, { "epoch": 0.1375, "grad_norm": 6.0137553215026855, "learning_rate": 9.861428571428572e-06, "loss": 0.6972, "step": 1100 }, { "epoch": 0.14375, "grad_norm": 6.463374137878418, "learning_rate": 9.790000000000001e-06, "loss": 0.6923, "step": 1150 }, { "epoch": 0.15, "grad_norm": 4.6144185066223145, "learning_rate": 9.71857142857143e-06, "loss": 0.5852, "step": 1200 }, { "epoch": 0.15625, "grad_norm": 6.625977516174316, "learning_rate": 9.647142857142857e-06, "loss": 0.6973, "step": 1250 }, { "epoch": 0.1625, "grad_norm": 5.229981899261475, "learning_rate": 9.575714285714286e-06, "loss": 0.6833, "step": 1300 }, { "epoch": 0.16875, "grad_norm": 4.967216491699219, "learning_rate": 9.504285714285715e-06, "loss": 0.7299, "step": 1350 }, { "epoch": 0.175, "grad_norm": 5.4233551025390625, "learning_rate": 9.432857142857143e-06, "loss": 0.7372, "step": 1400 }, { "epoch": 0.18125, "grad_norm": 5.350655555725098, "learning_rate": 9.361428571428572e-06, "loss": 0.6854, "step": 1450 }, { "epoch": 0.1875, "grad_norm": 4.796384811401367, "learning_rate": 9.29e-06, "loss": 0.5997, "step": 1500 }, { "epoch": 0.19375, "grad_norm": 4.787758827209473, "learning_rate": 9.218571428571428e-06, "loss": 0.6864, "step": 1550 }, { "epoch": 0.2, "grad_norm": 4.423880100250244, "learning_rate": 9.147142857142857e-06, "loss": 0.6138, "step": 1600 }, { "epoch": 0.20625, "grad_norm": 4.9922895431518555, "learning_rate": 9.075714285714286e-06, "loss": 0.6438, "step": 1650 }, { "epoch": 0.2125, "grad_norm": 5.809079170227051, "learning_rate": 9.004285714285715e-06, "loss": 0.6138, "step": 1700 }, { "epoch": 0.21875, "grad_norm": 4.862271785736084, "learning_rate": 8.932857142857143e-06, "loss": 0.6552, "step": 1750 }, { "epoch": 0.225, "grad_norm": 4.329502105712891, "learning_rate": 8.861428571428572e-06, "loss": 0.6638, "step": 1800 }, { "epoch": 0.23125, "grad_norm": 4.74673318862915, "learning_rate": 8.79e-06, "loss": 0.7205, "step": 1850 }, { "epoch": 0.2375, "grad_norm": 5.6587114334106445, "learning_rate": 8.71857142857143e-06, "loss": 0.6283, "step": 1900 }, { "epoch": 0.24375, "grad_norm": 4.025678634643555, "learning_rate": 8.647142857142859e-06, "loss": 0.5727, "step": 1950 }, { "epoch": 0.25, "grad_norm": 5.050192356109619, "learning_rate": 8.575714285714286e-06, "loss": 0.67, "step": 2000 }, { "epoch": 0.25, "eval_loss": 0.6608728766441345, "eval_runtime": 3982.4181, "eval_samples_per_second": 1.089, "eval_steps_per_second": 0.136, "eval_wer": 40.78878971053452, "step": 2000 }, { "epoch": 0.25625, "grad_norm": 5.174773216247559, "learning_rate": 8.504285714285715e-06, "loss": 0.7296, "step": 2050 }, { "epoch": 0.2625, "grad_norm": 4.204230308532715, "learning_rate": 8.432857142857144e-06, "loss": 0.6267, "step": 2100 }, { "epoch": 0.26875, "grad_norm": 4.041508674621582, "learning_rate": 8.361428571428573e-06, "loss": 0.6318, "step": 2150 }, { "epoch": 0.275, "grad_norm": 4.373353958129883, "learning_rate": 8.29e-06, "loss": 0.6095, "step": 2200 }, { "epoch": 0.28125, "grad_norm": 5.110273361206055, "learning_rate": 8.21857142857143e-06, "loss": 0.6114, "step": 2250 }, { "epoch": 0.2875, "grad_norm": 7.183779239654541, "learning_rate": 8.147142857142858e-06, "loss": 0.6142, "step": 2300 }, { "epoch": 0.29375, "grad_norm": 4.0610551834106445, "learning_rate": 8.075714285714287e-06, "loss": 0.5791, "step": 2350 }, { "epoch": 0.3, "grad_norm": 4.765223503112793, "learning_rate": 8.004285714285715e-06, "loss": 0.6245, "step": 2400 }, { "epoch": 0.30625, "grad_norm": 5.459305763244629, "learning_rate": 7.932857142857144e-06, "loss": 0.6329, "step": 2450 }, { "epoch": 0.3125, "grad_norm": 4.936705112457275, "learning_rate": 7.861428571428573e-06, "loss": 0.6021, "step": 2500 }, { "epoch": 0.31875, "grad_norm": 4.05385684967041, "learning_rate": 7.790000000000002e-06, "loss": 0.6026, "step": 2550 }, { "epoch": 0.325, "grad_norm": 4.764458179473877, "learning_rate": 7.72e-06, "loss": 0.6191, "step": 2600 }, { "epoch": 0.33125, "grad_norm": 4.8262434005737305, "learning_rate": 7.64857142857143e-06, "loss": 0.6451, "step": 2650 }, { "epoch": 0.3375, "grad_norm": 5.038938522338867, "learning_rate": 7.577142857142857e-06, "loss": 0.6515, "step": 2700 }, { "epoch": 0.34375, "grad_norm": 6.03196907043457, "learning_rate": 7.505714285714286e-06, "loss": 0.6328, "step": 2750 }, { "epoch": 0.35, "grad_norm": 4.219414710998535, "learning_rate": 7.434285714285715e-06, "loss": 0.6202, "step": 2800 }, { "epoch": 0.35625, "grad_norm": 4.451348781585693, "learning_rate": 7.362857142857144e-06, "loss": 0.6193, "step": 2850 }, { "epoch": 0.3625, "grad_norm": 4.656250953674316, "learning_rate": 7.291428571428571e-06, "loss": 0.5295, "step": 2900 }, { "epoch": 0.36875, "grad_norm": 14.434591293334961, "learning_rate": 7.22e-06, "loss": 0.6179, "step": 2950 }, { "epoch": 0.375, "grad_norm": 4.388996124267578, "learning_rate": 7.148571428571429e-06, "loss": 0.5695, "step": 3000 }, { "epoch": 0.38125, "grad_norm": 4.346401214599609, "learning_rate": 7.077142857142858e-06, "loss": 0.5755, "step": 3050 }, { "epoch": 0.3875, "grad_norm": 4.0902252197265625, "learning_rate": 7.0057142857142865e-06, "loss": 0.6107, "step": 3100 }, { "epoch": 0.39375, "grad_norm": 4.089328289031982, "learning_rate": 6.934285714285715e-06, "loss": 0.5957, "step": 3150 }, { "epoch": 0.4, "grad_norm": 5.215518951416016, "learning_rate": 6.862857142857144e-06, "loss": 0.5947, "step": 3200 }, { "epoch": 0.40625, "grad_norm": 4.511919975280762, "learning_rate": 6.791428571428572e-06, "loss": 0.6286, "step": 3250 }, { "epoch": 0.4125, "grad_norm": 4.459268569946289, "learning_rate": 6.720000000000001e-06, "loss": 0.6103, "step": 3300 }, { "epoch": 0.41875, "grad_norm": 4.5081868171691895, "learning_rate": 6.648571428571429e-06, "loss": 0.6093, "step": 3350 }, { "epoch": 0.425, "grad_norm": 5.564323425292969, "learning_rate": 6.577142857142857e-06, "loss": 0.5998, "step": 3400 }, { "epoch": 0.43125, "grad_norm": 4.530568599700928, "learning_rate": 6.505714285714286e-06, "loss": 0.5917, "step": 3450 }, { "epoch": 0.4375, "grad_norm": 5.0652079582214355, "learning_rate": 6.434285714285715e-06, "loss": 0.5956, "step": 3500 }, { "epoch": 0.44375, "grad_norm": 5.318765163421631, "learning_rate": 6.3628571428571426e-06, "loss": 0.6545, "step": 3550 }, { "epoch": 0.45, "grad_norm": 4.156691074371338, "learning_rate": 6.2914285714285716e-06, "loss": 0.6077, "step": 3600 }, { "epoch": 0.45625, "grad_norm": 4.188990116119385, "learning_rate": 6.220000000000001e-06, "loss": 0.6151, "step": 3650 }, { "epoch": 0.4625, "grad_norm": 4.593184471130371, "learning_rate": 6.14857142857143e-06, "loss": 0.6372, "step": 3700 }, { "epoch": 0.46875, "grad_norm": 6.544657230377197, "learning_rate": 6.077142857142858e-06, "loss": 0.633, "step": 3750 }, { "epoch": 0.475, "grad_norm": 4.2101521492004395, "learning_rate": 6.005714285714286e-06, "loss": 0.564, "step": 3800 }, { "epoch": 0.48125, "grad_norm": 4.94673490524292, "learning_rate": 5.934285714285715e-06, "loss": 0.631, "step": 3850 }, { "epoch": 0.4875, "grad_norm": 5.893890380859375, "learning_rate": 5.862857142857143e-06, "loss": 0.5977, "step": 3900 }, { "epoch": 0.49375, "grad_norm": 4.555218696594238, "learning_rate": 5.791428571428572e-06, "loss": 0.6037, "step": 3950 }, { "epoch": 0.5, "grad_norm": 4.622659683227539, "learning_rate": 5.72e-06, "loss": 0.6431, "step": 4000 }, { "epoch": 0.5, "eval_loss": 0.5986924767494202, "eval_runtime": 3981.5379, "eval_samples_per_second": 1.09, "eval_steps_per_second": 0.136, "eval_wer": 40.029852519641764, "step": 4000 }, { "epoch": 0.50625, "grad_norm": 5.098015785217285, "learning_rate": 5.6485714285714285e-06, "loss": 0.6914, "step": 4050 }, { "epoch": 0.5125, "grad_norm": 5.3102312088012695, "learning_rate": 5.5771428571428575e-06, "loss": 0.6189, "step": 4100 }, { "epoch": 0.51875, "grad_norm": 4.711716651916504, "learning_rate": 5.5057142857142865e-06, "loss": 0.6726, "step": 4150 }, { "epoch": 0.525, "grad_norm": 4.459558963775635, "learning_rate": 5.4342857142857155e-06, "loss": 0.6484, "step": 4200 }, { "epoch": 0.53125, "grad_norm": 5.451053619384766, "learning_rate": 5.362857142857143e-06, "loss": 0.5951, "step": 4250 }, { "epoch": 0.5375, "grad_norm": 3.955195665359497, "learning_rate": 5.291428571428572e-06, "loss": 0.5998, "step": 4300 }, { "epoch": 1.002, "grad_norm": 4.236627578735352, "learning_rate": 5.220000000000001e-06, "loss": 0.572, "step": 4350 }, { "epoch": 1.00825, "grad_norm": 5.792555332183838, "learning_rate": 5.14857142857143e-06, "loss": 0.5485, "step": 4400 }, { "epoch": 1.0145, "grad_norm": 4.863269329071045, "learning_rate": 5.077142857142857e-06, "loss": 0.538, "step": 4450 }, { "epoch": 1.02075, "grad_norm": 5.243031024932861, "learning_rate": 5.005714285714286e-06, "loss": 0.6561, "step": 4500 }, { "epoch": 1.027, "grad_norm": 4.527797698974609, "learning_rate": 4.934285714285715e-06, "loss": 0.5889, "step": 4550 }, { "epoch": 1.03325, "grad_norm": 4.516623020172119, "learning_rate": 4.862857142857143e-06, "loss": 0.5596, "step": 4600 }, { "epoch": 1.0395, "grad_norm": 4.004039287567139, "learning_rate": 4.7914285714285715e-06, "loss": 0.5699, "step": 4650 }, { "epoch": 1.04575, "grad_norm": 4.805927276611328, "learning_rate": 4.7200000000000005e-06, "loss": 0.5761, "step": 4700 }, { "epoch": 1.052, "grad_norm": 4.1281962394714355, "learning_rate": 4.648571428571429e-06, "loss": 0.5827, "step": 4750 }, { "epoch": 1.05825, "grad_norm": 4.829611778259277, "learning_rate": 4.577142857142858e-06, "loss": 0.5865, "step": 4800 }, { "epoch": 1.0645, "grad_norm": 3.7667906284332275, "learning_rate": 4.505714285714286e-06, "loss": 0.5711, "step": 4850 }, { "epoch": 1.07075, "grad_norm": 4.786715984344482, "learning_rate": 4.434285714285715e-06, "loss": 0.5728, "step": 4900 }, { "epoch": 1.077, "grad_norm": 4.958069801330566, "learning_rate": 4.362857142857143e-06, "loss": 0.5611, "step": 4950 }, { "epoch": 1.08325, "grad_norm": 3.8642232418060303, "learning_rate": 4.291428571428572e-06, "loss": 0.5695, "step": 5000 }, { "epoch": 1.0895, "grad_norm": 4.423571586608887, "learning_rate": 4.22e-06, "loss": 0.5213, "step": 5050 }, { "epoch": 1.09575, "grad_norm": 4.207879066467285, "learning_rate": 4.148571428571429e-06, "loss": 0.5132, "step": 5100 }, { "epoch": 1.102, "grad_norm": 4.761569499969482, "learning_rate": 4.0771428571428574e-06, "loss": 0.5234, "step": 5150 }, { "epoch": 1.10825, "grad_norm": 4.476335525512695, "learning_rate": 4.0057142857142864e-06, "loss": 0.5797, "step": 5200 }, { "epoch": 1.1145, "grad_norm": 3.8417840003967285, "learning_rate": 3.934285714285715e-06, "loss": 0.4683, "step": 5250 }, { "epoch": 1.12075, "grad_norm": 3.265429735183716, "learning_rate": 3.862857142857143e-06, "loss": 0.4727, "step": 5300 }, { "epoch": 1.127, "grad_norm": 4.57379674911499, "learning_rate": 3.7914285714285722e-06, "loss": 0.5032, "step": 5350 }, { "epoch": 1.13325, "grad_norm": 5.103521823883057, "learning_rate": 3.7200000000000004e-06, "loss": 0.5116, "step": 5400 }, { "epoch": 1.1395, "grad_norm": 4.07943058013916, "learning_rate": 3.648571428571429e-06, "loss": 0.4916, "step": 5450 }, { "epoch": 1.14575, "grad_norm": 4.684885501861572, "learning_rate": 3.5771428571428576e-06, "loss": 0.4923, "step": 5500 }, { "epoch": 1.152, "grad_norm": 3.8068909645080566, "learning_rate": 3.505714285714286e-06, "loss": 0.4273, "step": 5550 }, { "epoch": 1.15825, "grad_norm": 4.139249324798584, "learning_rate": 3.4342857142857143e-06, "loss": 0.5505, "step": 5600 }, { "epoch": 1.1645, "grad_norm": 5.06072998046875, "learning_rate": 3.3628571428571433e-06, "loss": 0.4919, "step": 5650 }, { "epoch": 1.17075, "grad_norm": 4.353974342346191, "learning_rate": 3.2914285714285715e-06, "loss": 0.5165, "step": 5700 }, { "epoch": 1.177, "grad_norm": 4.245412826538086, "learning_rate": 3.2200000000000005e-06, "loss": 0.543, "step": 5750 }, { "epoch": 1.18325, "grad_norm": 3.9986581802368164, "learning_rate": 3.1485714285714287e-06, "loss": 0.5065, "step": 5800 }, { "epoch": 1.1895, "grad_norm": 4.012422561645508, "learning_rate": 3.0771428571428573e-06, "loss": 0.4563, "step": 5850 }, { "epoch": 1.19575, "grad_norm": 3.4883952140808105, "learning_rate": 3.005714285714286e-06, "loss": 0.517, "step": 5900 }, { "epoch": 1.202, "grad_norm": 3.5395171642303467, "learning_rate": 2.9342857142857144e-06, "loss": 0.4525, "step": 5950 }, { "epoch": 1.20825, "grad_norm": 4.436487674713135, "learning_rate": 2.8628571428571435e-06, "loss": 0.476, "step": 6000 }, { "epoch": 1.20825, "eval_loss": 0.577191948890686, "eval_runtime": 3956.9016, "eval_samples_per_second": 1.096, "eval_steps_per_second": 0.137, "eval_wer": 37.15599055412812, "step": 6000 } ], "logging_steps": 50, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.385036770295808e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }