{ "best_metric": 0.8237398652434835, "best_model_checkpoint": "output/eurobert_simce_EuroBERT-EuroBERT-210m_32_bs_1_e/checkpoint-4500", "epoch": 1.0, "eval_steps": 500, "global_step": 7813, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02559836170485089, "grad_norm": 21741072.0, "learning_rate": 1.2787723785166241e-05, "loss": 8.8816, "step": 200 }, { "epoch": 0.05119672340970178, "grad_norm": 11201852.0, "learning_rate": 2.5575447570332482e-05, "loss": 5.1404, "step": 400 }, { "epoch": 0.06399590426212723, "eval_loss": 6.53036642074585, "eval_runtime": 185.969, "eval_samples_per_second": 35.538, "eval_sequential_score": 0.785466694337272, "eval_steps_per_second": 0.28, "eval_sts-dev-128_pearson_cosine": 0.763999536812149, "eval_sts-dev-128_spearman_cosine": 0.7711764958651725, "eval_sts-dev-256_pearson_cosine": 0.7720362664289375, "eval_sts-dev-256_spearman_cosine": 0.7765855658716928, "eval_sts-dev-512_pearson_cosine": 0.7788408953704133, "eval_sts-dev-512_spearman_cosine": 0.7817962751922598, "eval_sts-dev-64_pearson_cosine": 0.7530255624130506, "eval_sts-dev-64_spearman_cosine": 0.7635133266587258, "eval_sts-dev-768_pearson_cosine": 0.7833066834602731, "eval_sts-dev-768_spearman_cosine": 0.785466694337272, "step": 500 }, { "epoch": 0.07679508511455267, "grad_norm": 97525040.0, "learning_rate": 3.8363171355498725e-05, "loss": 4.7789, "step": 600 }, { "epoch": 0.10239344681940356, "grad_norm": 16968314.0, "learning_rate": 4.987199544872707e-05, "loss": 4.6845, "step": 800 }, { "epoch": 0.12799180852425446, "grad_norm": 8517995.0, "learning_rate": 4.844972265680558e-05, "loss": 4.6628, "step": 1000 }, { "epoch": 0.12799180852425446, "eval_loss": 6.329844951629639, "eval_runtime": 443.2691, "eval_samples_per_second": 14.91, "eval_sequential_score": 0.7900474282041248, "eval_steps_per_second": 0.117, "eval_sts-dev-128_pearson_cosine": 0.7796253891599777, "eval_sts-dev-128_spearman_cosine": 0.7828728829847589, "eval_sts-dev-256_pearson_cosine": 0.7844919892349245, "eval_sts-dev-256_spearman_cosine": 0.7854868395314357, "eval_sts-dev-512_pearson_cosine": 0.7895438861374281, "eval_sts-dev-512_spearman_cosine": 0.7887806711839089, "eval_sts-dev-64_pearson_cosine": 0.7711864442090859, "eval_sts-dev-64_spearman_cosine": 0.7756594477323392, "eval_sts-dev-768_pearson_cosine": 0.7912711696792909, "eval_sts-dev-768_spearman_cosine": 0.7900474282041248, "step": 1000 }, { "epoch": 0.15359017022910534, "grad_norm": 9523596.0, "learning_rate": 4.702744986488409e-05, "loss": 4.2947, "step": 1200 }, { "epoch": 0.17918853193395623, "grad_norm": 12880760.0, "learning_rate": 4.56051770729626e-05, "loss": 4.0669, "step": 1400 }, { "epoch": 0.19198771278638166, "eval_loss": 6.11918830871582, "eval_runtime": 442.8341, "eval_samples_per_second": 14.924, "eval_sequential_score": 0.776235475618058, "eval_steps_per_second": 0.117, "eval_sts-dev-128_pearson_cosine": 0.7556070600058153, "eval_sts-dev-128_spearman_cosine": 0.7650883993782764, "eval_sts-dev-256_pearson_cosine": 0.7615982531224779, "eval_sts-dev-256_spearman_cosine": 0.7688372980643342, "eval_sts-dev-512_pearson_cosine": 0.7658162533685451, "eval_sts-dev-512_spearman_cosine": 0.7717076619559236, "eval_sts-dev-64_pearson_cosine": 0.7412214467196323, "eval_sts-dev-64_spearman_cosine": 0.7546465937668674, "eval_sts-dev-768_pearson_cosine": 0.7709585212331336, "eval_sts-dev-768_spearman_cosine": 0.776235475618058, "step": 1500 }, { "epoch": 0.20478689363880712, "grad_norm": 5121008.5, "learning_rate": 4.4182904281041105e-05, "loss": 3.7798, "step": 1600 }, { "epoch": 0.230385255343658, "grad_norm": 8532118.0, "learning_rate": 4.276063148911961e-05, "loss": 3.6295, "step": 1800 }, { "epoch": 0.2559836170485089, "grad_norm": 5177615.0, "learning_rate": 4.133835869719813e-05, "loss": 3.4326, "step": 2000 }, { "epoch": 0.2559836170485089, "eval_loss": 5.5250773429870605, "eval_runtime": 129.0233, "eval_samples_per_second": 51.223, "eval_sequential_score": 0.7967876172773333, "eval_steps_per_second": 0.403, "eval_sts-dev-128_pearson_cosine": 0.784502590249166, "eval_sts-dev-128_spearman_cosine": 0.7904922950886314, "eval_sts-dev-256_pearson_cosine": 0.7879728763645151, "eval_sts-dev-256_spearman_cosine": 0.7926398088881677, "eval_sts-dev-512_pearson_cosine": 0.79034320657993, "eval_sts-dev-512_spearman_cosine": 0.7941246347219933, "eval_sts-dev-64_pearson_cosine": 0.7732918356795019, "eval_sts-dev-64_spearman_cosine": 0.7822318891319547, "eval_sts-dev-768_pearson_cosine": 0.7932888301272122, "eval_sts-dev-768_spearman_cosine": 0.7967876172773333, "step": 2000 }, { "epoch": 0.2815819787533598, "grad_norm": 3656165.0, "learning_rate": 3.9916085905276635e-05, "loss": 3.5024, "step": 2200 }, { "epoch": 0.3071803404582107, "grad_norm": 4165362.5, "learning_rate": 3.849381311335514e-05, "loss": 3.2039, "step": 2400 }, { "epoch": 0.3199795213106361, "eval_loss": 5.417263507843018, "eval_runtime": 190.9081, "eval_samples_per_second": 34.619, "eval_sequential_score": 0.7985143359688982, "eval_steps_per_second": 0.272, "eval_sts-dev-128_pearson_cosine": 0.7806598252859374, "eval_sts-dev-128_spearman_cosine": 0.7903599418037432, "eval_sts-dev-256_pearson_cosine": 0.7884086400168866, "eval_sts-dev-256_spearman_cosine": 0.7946240645451771, "eval_sts-dev-512_pearson_cosine": 0.7915267570052114, "eval_sts-dev-512_spearman_cosine": 0.7957194058029897, "eval_sts-dev-64_pearson_cosine": 0.7654385410326767, "eval_sts-dev-64_spearman_cosine": 0.7805870680928543, "eval_sts-dev-768_pearson_cosine": 0.7950747299618343, "eval_sts-dev-768_spearman_cosine": 0.7985143359688982, "step": 2500 }, { "epoch": 0.33277870216306155, "grad_norm": 3111441.75, "learning_rate": 3.707154032143365e-05, "loss": 3.1517, "step": 2600 }, { "epoch": 0.35837706386791246, "grad_norm": 4315246.0, "learning_rate": 3.5649267529512165e-05, "loss": 3.0409, "step": 2800 }, { "epoch": 0.3839754255727633, "grad_norm": 4406995.0, "learning_rate": 3.422699473759067e-05, "loss": 2.9611, "step": 3000 }, { "epoch": 0.3839754255727633, "eval_loss": 5.039449691772461, "eval_runtime": 186.7714, "eval_samples_per_second": 35.385, "eval_sequential_score": 0.7922583016547031, "eval_steps_per_second": 0.278, "eval_sts-dev-128_pearson_cosine": 0.782435760922896, "eval_sts-dev-128_spearman_cosine": 0.7847986500402634, "eval_sts-dev-256_pearson_cosine": 0.785603571035357, "eval_sts-dev-256_spearman_cosine": 0.7871271821843686, "eval_sts-dev-512_pearson_cosine": 0.7882585155923556, "eval_sts-dev-512_spearman_cosine": 0.7893899811664858, "eval_sts-dev-64_pearson_cosine": 0.7731641525494573, "eval_sts-dev-64_spearman_cosine": 0.7789396674425884, "eval_sts-dev-768_pearson_cosine": 0.7914921308293312, "eval_sts-dev-768_spearman_cosine": 0.7922583016547031, "step": 3000 }, { "epoch": 0.40957378727761423, "grad_norm": 4295917.5, "learning_rate": 3.280472194566918e-05, "loss": 2.8913, "step": 3200 }, { "epoch": 0.43517214898246515, "grad_norm": 3956720.0, "learning_rate": 3.138244915374769e-05, "loss": 2.6737, "step": 3400 }, { "epoch": 0.4479713298348906, "eval_loss": 4.8449788093566895, "eval_runtime": 381.2673, "eval_samples_per_second": 17.334, "eval_sequential_score": 0.8124001874236463, "eval_steps_per_second": 0.136, "eval_sts-dev-128_pearson_cosine": 0.7982139278806232, "eval_sts-dev-128_spearman_cosine": 0.8076074750236868, "eval_sts-dev-256_pearson_cosine": 0.7999244325189871, "eval_sts-dev-256_spearman_cosine": 0.8075358060747592, "eval_sts-dev-512_pearson_cosine": 0.8054012343851129, "eval_sts-dev-512_spearman_cosine": 0.8110777459628828, "eval_sts-dev-64_pearson_cosine": 0.7826221833277923, "eval_sts-dev-64_spearman_cosine": 0.7968397814525646, "eval_sts-dev-768_pearson_cosine": 0.8074395668796857, "eval_sts-dev-768_spearman_cosine": 0.8124001874236463, "step": 3500 }, { "epoch": 0.460770510687316, "grad_norm": 3219552.5, "learning_rate": 2.99601763618262e-05, "loss": 2.6488, "step": 3600 }, { "epoch": 0.4863688723921669, "grad_norm": 3358243.75, "learning_rate": 2.853790356990471e-05, "loss": 2.6208, "step": 3800 }, { "epoch": 0.5119672340970178, "grad_norm": 4434459.5, "learning_rate": 2.7115630777983218e-05, "loss": 2.4823, "step": 4000 }, { "epoch": 0.5119672340970178, "eval_loss": 4.5710768699646, "eval_runtime": 216.5327, "eval_samples_per_second": 30.522, "eval_sequential_score": 0.811115353774689, "eval_steps_per_second": 0.24, "eval_sts-dev-128_pearson_cosine": 0.7996500780619267, "eval_sts-dev-128_spearman_cosine": 0.8075174124731305, "eval_sts-dev-256_pearson_cosine": 0.8022455109638521, "eval_sts-dev-256_spearman_cosine": 0.8081526580763048, "eval_sts-dev-512_pearson_cosine": 0.8053149776225357, "eval_sts-dev-512_spearman_cosine": 0.8101542245323032, "eval_sts-dev-64_pearson_cosine": 0.7897403224233863, "eval_sts-dev-64_spearman_cosine": 0.8014649100236256, "eval_sts-dev-768_pearson_cosine": 0.8071703574474214, "eval_sts-dev-768_spearman_cosine": 0.811115353774689, "step": 4000 }, { "epoch": 0.5375655958018687, "grad_norm": 3851683.75, "learning_rate": 2.569335798606173e-05, "loss": 2.5081, "step": 4200 }, { "epoch": 0.5631639575067195, "grad_norm": 3974658.0, "learning_rate": 2.4271085194140237e-05, "loss": 2.3827, "step": 4400 }, { "epoch": 0.575963138359145, "eval_loss": 4.527626037597656, "eval_runtime": 247.0855, "eval_samples_per_second": 26.748, "eval_sequential_score": 0.8237398652434835, "eval_steps_per_second": 0.21, "eval_sts-dev-128_pearson_cosine": 0.8088977658009835, "eval_sts-dev-128_spearman_cosine": 0.8200038307453663, "eval_sts-dev-256_pearson_cosine": 0.8120809058779974, "eval_sts-dev-256_spearman_cosine": 0.8205438030370273, "eval_sts-dev-512_pearson_cosine": 0.8157264805096027, "eval_sts-dev-512_spearman_cosine": 0.8226500864435473, "eval_sts-dev-64_pearson_cosine": 0.7964069841493276, "eval_sts-dev-64_spearman_cosine": 0.8117000425044992, "eval_sts-dev-768_pearson_cosine": 0.8179809027318157, "eval_sts-dev-768_spearman_cosine": 0.8237398652434835, "step": 4500 }, { "epoch": 0.5887623192115704, "grad_norm": 2969929.75, "learning_rate": 2.284881240221875e-05, "loss": 2.2867, "step": 4600 }, { "epoch": 0.6143606809164214, "grad_norm": 3068778.0, "learning_rate": 2.1426539610297256e-05, "loss": 2.2608, "step": 4800 }, { "epoch": 0.6399590426212722, "grad_norm": 5789758.5, "learning_rate": 2.0004266818375767e-05, "loss": 2.6285, "step": 5000 }, { "epoch": 0.6399590426212722, "eval_loss": 2.69280743598938, "eval_runtime": 587.2079, "eval_samples_per_second": 11.255, "eval_sequential_score": 0.812358680832537, "eval_steps_per_second": 0.089, "eval_sts-dev-128_pearson_cosine": 0.805215933596938, "eval_sts-dev-128_spearman_cosine": 0.8087200580616569, "eval_sts-dev-256_pearson_cosine": 0.8085076472836253, "eval_sts-dev-256_spearman_cosine": 0.8099002132418758, "eval_sts-dev-512_pearson_cosine": 0.8104747450142471, "eval_sts-dev-512_spearman_cosine": 0.8112676803940946, "eval_sts-dev-64_pearson_cosine": 0.7956052250413164, "eval_sts-dev-64_spearman_cosine": 0.8022672223914163, "eval_sts-dev-768_pearson_cosine": 0.8122169140710348, "eval_sts-dev-768_spearman_cosine": 0.812358680832537, "step": 5000 }, { "epoch": 0.6655574043261231, "grad_norm": 7265908.5, "learning_rate": 1.8581994026454275e-05, "loss": 3.2569, "step": 5200 }, { "epoch": 0.6911557660309741, "grad_norm": 5744435.0, "learning_rate": 1.7159721234532783e-05, "loss": 2.7108, "step": 5400 }, { "epoch": 0.7039549468833994, "eval_loss": 3.4081127643585205, "eval_runtime": 113.2957, "eval_samples_per_second": 58.334, "eval_sequential_score": 0.8112072214643352, "eval_steps_per_second": 0.459, "eval_sts-dev-128_pearson_cosine": 0.8036120610302749, "eval_sts-dev-128_spearman_cosine": 0.8060065068978162, "eval_sts-dev-256_pearson_cosine": 0.8076653976665353, "eval_sts-dev-256_spearman_cosine": 0.8079945036667597, "eval_sts-dev-512_pearson_cosine": 0.8103438010262101, "eval_sts-dev-512_spearman_cosine": 0.8099837098639602, "eval_sts-dev-64_pearson_cosine": 0.7942652314065892, "eval_sts-dev-64_spearman_cosine": 0.7993950394097328, "eval_sts-dev-768_pearson_cosine": 0.8122845848971061, "eval_sts-dev-768_spearman_cosine": 0.8112072214643352, "step": 5500 }, { "epoch": 0.7167541277358249, "grad_norm": 5608753.5, "learning_rate": 1.5737448442611294e-05, "loss": 2.2756, "step": 5600 }, { "epoch": 0.7423524894406758, "grad_norm": 4639111.5, "learning_rate": 1.4315175650689802e-05, "loss": 1.9964, "step": 5800 }, { "epoch": 0.7679508511455266, "grad_norm": 3252142.5, "learning_rate": 1.2892902858768313e-05, "loss": 1.8278, "step": 6000 }, { "epoch": 0.7679508511455266, "eval_loss": 3.626107931137085, "eval_runtime": 402.388, "eval_samples_per_second": 16.424, "eval_sequential_score": 0.8115702497698598, "eval_steps_per_second": 0.129, "eval_sts-dev-128_pearson_cosine": 0.8045525818531101, "eval_sts-dev-128_spearman_cosine": 0.8070988970713203, "eval_sts-dev-256_pearson_cosine": 0.808009240681977, "eval_sts-dev-256_spearman_cosine": 0.8087801893710216, "eval_sts-dev-512_pearson_cosine": 0.8100529203161405, "eval_sts-dev-512_spearman_cosine": 0.8101431817309978, "eval_sts-dev-64_pearson_cosine": 0.7958099227865137, "eval_sts-dev-64_spearman_cosine": 0.8012609269210379, "eval_sts-dev-768_pearson_cosine": 0.8121800215134509, "eval_sts-dev-768_spearman_cosine": 0.8115702497698598, "step": 6000 }, { "epoch": 0.7935492128503776, "grad_norm": 7875703.0, "learning_rate": 1.147063006684682e-05, "loss": 1.7105, "step": 6200 }, { "epoch": 0.8191475745552285, "grad_norm": 4817139.5, "learning_rate": 1.0048357274925332e-05, "loss": 1.5719, "step": 6400 }, { "epoch": 0.831946755407654, "eval_loss": 3.7825615406036377, "eval_runtime": 260.9688, "eval_samples_per_second": 25.325, "eval_sequential_score": 0.809746911527668, "eval_steps_per_second": 0.199, "eval_sts-dev-128_pearson_cosine": 0.8019992344978342, "eval_sts-dev-128_spearman_cosine": 0.8040026735198376, "eval_sts-dev-256_pearson_cosine": 0.8069582374847454, "eval_sts-dev-256_spearman_cosine": 0.8072454625835676, "eval_sts-dev-512_pearson_cosine": 0.8088522922289604, "eval_sts-dev-512_spearman_cosine": 0.8085037600756491, "eval_sts-dev-64_pearson_cosine": 0.7917208895813888, "eval_sts-dev-64_spearman_cosine": 0.7966340332099681, "eval_sts-dev-768_pearson_cosine": 0.8110657541385573, "eval_sts-dev-768_spearman_cosine": 0.809746911527668, "step": 6500 }, { "epoch": 0.8447459362600793, "grad_norm": 3659550.25, "learning_rate": 8.626084483003841e-06, "loss": 1.4569, "step": 6600 }, { "epoch": 0.8703442979649303, "grad_norm": 5161893.0, "learning_rate": 7.20381169108235e-06, "loss": 1.3572, "step": 6800 }, { "epoch": 0.8959426596697811, "grad_norm": 3728513.5, "learning_rate": 5.781538899160859e-06, "loss": 1.2607, "step": 7000 }, { "epoch": 0.8959426596697811, "eval_loss": 3.732253074645996, "eval_runtime": 343.8, "eval_samples_per_second": 19.223, "eval_sequential_score": 0.8113987863085653, "eval_steps_per_second": 0.151, "eval_sts-dev-128_pearson_cosine": 0.8052203471354245, "eval_sts-dev-128_spearman_cosine": 0.8069675064136675, "eval_sts-dev-256_pearson_cosine": 0.8090504276321419, "eval_sts-dev-256_spearman_cosine": 0.8092690932878782, "eval_sts-dev-512_pearson_cosine": 0.8106848356090132, "eval_sts-dev-512_spearman_cosine": 0.8102076026050016, "eval_sts-dev-64_pearson_cosine": 0.7959836620564662, "eval_sts-dev-64_spearman_cosine": 0.8004825005334193, "eval_sts-dev-768_pearson_cosine": 0.8127183729174601, "eval_sts-dev-768_spearman_cosine": 0.8113987863085653, "step": 7000 }, { "epoch": 0.921541021374632, "grad_norm": 3384944.0, "learning_rate": 4.359266107239369e-06, "loss": 1.1676, "step": 7200 }, { "epoch": 0.9471393830794829, "grad_norm": 3170591.5, "learning_rate": 2.936993315317878e-06, "loss": 1.1663, "step": 7400 }, { "epoch": 0.9599385639319084, "eval_loss": 3.83072829246521, "eval_runtime": 240.3095, "eval_samples_per_second": 27.502, "eval_sequential_score": 0.8100586279907306, "eval_steps_per_second": 0.216, "eval_sts-dev-128_pearson_cosine": 0.8028710019029521, "eval_sts-dev-128_spearman_cosine": 0.8054855987917489, "eval_sts-dev-256_pearson_cosine": 0.8076510620939634, "eval_sts-dev-256_spearman_cosine": 0.8080588277305082, "eval_sts-dev-512_pearson_cosine": 0.8092891955563192, "eval_sts-dev-512_spearman_cosine": 0.8087644228771842, "eval_sts-dev-64_pearson_cosine": 0.7923252906438638, "eval_sts-dev-64_spearman_cosine": 0.7975941111911333, "eval_sts-dev-768_pearson_cosine": 0.8111988062913815, "eval_sts-dev-768_spearman_cosine": 0.8100586279907306, "step": 7500 }, { "epoch": 0.9727377447843338, "grad_norm": 3539899.5, "learning_rate": 1.5147205233963876e-06, "loss": 1.1079, "step": 7600 }, { "epoch": 0.9983361064891847, "grad_norm": 4290327.5, "learning_rate": 9.24477314748969e-08, "loss": 1.0827, "step": 7800 } ], "logging_steps": 200, "max_steps": 7813, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }