|
{ |
|
"best_metric": 0.8237398652434835, |
|
"best_model_checkpoint": "output/eurobert_simce_EuroBERT-EuroBERT-210m_32_bs_1_e/checkpoint-4500", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 7813, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02559836170485089, |
|
"grad_norm": 21741072.0, |
|
"learning_rate": 1.2787723785166241e-05, |
|
"loss": 8.8816, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05119672340970178, |
|
"grad_norm": 11201852.0, |
|
"learning_rate": 2.5575447570332482e-05, |
|
"loss": 5.1404, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06399590426212723, |
|
"eval_loss": 6.53036642074585, |
|
"eval_runtime": 185.969, |
|
"eval_samples_per_second": 35.538, |
|
"eval_sequential_score": 0.785466694337272, |
|
"eval_steps_per_second": 0.28, |
|
"eval_sts-dev-128_pearson_cosine": 0.763999536812149, |
|
"eval_sts-dev-128_spearman_cosine": 0.7711764958651725, |
|
"eval_sts-dev-256_pearson_cosine": 0.7720362664289375, |
|
"eval_sts-dev-256_spearman_cosine": 0.7765855658716928, |
|
"eval_sts-dev-512_pearson_cosine": 0.7788408953704133, |
|
"eval_sts-dev-512_spearman_cosine": 0.7817962751922598, |
|
"eval_sts-dev-64_pearson_cosine": 0.7530255624130506, |
|
"eval_sts-dev-64_spearman_cosine": 0.7635133266587258, |
|
"eval_sts-dev-768_pearson_cosine": 0.7833066834602731, |
|
"eval_sts-dev-768_spearman_cosine": 0.785466694337272, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07679508511455267, |
|
"grad_norm": 97525040.0, |
|
"learning_rate": 3.8363171355498725e-05, |
|
"loss": 4.7789, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10239344681940356, |
|
"grad_norm": 16968314.0, |
|
"learning_rate": 4.987199544872707e-05, |
|
"loss": 4.6845, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12799180852425446, |
|
"grad_norm": 8517995.0, |
|
"learning_rate": 4.844972265680558e-05, |
|
"loss": 4.6628, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12799180852425446, |
|
"eval_loss": 6.329844951629639, |
|
"eval_runtime": 443.2691, |
|
"eval_samples_per_second": 14.91, |
|
"eval_sequential_score": 0.7900474282041248, |
|
"eval_steps_per_second": 0.117, |
|
"eval_sts-dev-128_pearson_cosine": 0.7796253891599777, |
|
"eval_sts-dev-128_spearman_cosine": 0.7828728829847589, |
|
"eval_sts-dev-256_pearson_cosine": 0.7844919892349245, |
|
"eval_sts-dev-256_spearman_cosine": 0.7854868395314357, |
|
"eval_sts-dev-512_pearson_cosine": 0.7895438861374281, |
|
"eval_sts-dev-512_spearman_cosine": 0.7887806711839089, |
|
"eval_sts-dev-64_pearson_cosine": 0.7711864442090859, |
|
"eval_sts-dev-64_spearman_cosine": 0.7756594477323392, |
|
"eval_sts-dev-768_pearson_cosine": 0.7912711696792909, |
|
"eval_sts-dev-768_spearman_cosine": 0.7900474282041248, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15359017022910534, |
|
"grad_norm": 9523596.0, |
|
"learning_rate": 4.702744986488409e-05, |
|
"loss": 4.2947, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.17918853193395623, |
|
"grad_norm": 12880760.0, |
|
"learning_rate": 4.56051770729626e-05, |
|
"loss": 4.0669, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.19198771278638166, |
|
"eval_loss": 6.11918830871582, |
|
"eval_runtime": 442.8341, |
|
"eval_samples_per_second": 14.924, |
|
"eval_sequential_score": 0.776235475618058, |
|
"eval_steps_per_second": 0.117, |
|
"eval_sts-dev-128_pearson_cosine": 0.7556070600058153, |
|
"eval_sts-dev-128_spearman_cosine": 0.7650883993782764, |
|
"eval_sts-dev-256_pearson_cosine": 0.7615982531224779, |
|
"eval_sts-dev-256_spearman_cosine": 0.7688372980643342, |
|
"eval_sts-dev-512_pearson_cosine": 0.7658162533685451, |
|
"eval_sts-dev-512_spearman_cosine": 0.7717076619559236, |
|
"eval_sts-dev-64_pearson_cosine": 0.7412214467196323, |
|
"eval_sts-dev-64_spearman_cosine": 0.7546465937668674, |
|
"eval_sts-dev-768_pearson_cosine": 0.7709585212331336, |
|
"eval_sts-dev-768_spearman_cosine": 0.776235475618058, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.20478689363880712, |
|
"grad_norm": 5121008.5, |
|
"learning_rate": 4.4182904281041105e-05, |
|
"loss": 3.7798, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.230385255343658, |
|
"grad_norm": 8532118.0, |
|
"learning_rate": 4.276063148911961e-05, |
|
"loss": 3.6295, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2559836170485089, |
|
"grad_norm": 5177615.0, |
|
"learning_rate": 4.133835869719813e-05, |
|
"loss": 3.4326, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2559836170485089, |
|
"eval_loss": 5.5250773429870605, |
|
"eval_runtime": 129.0233, |
|
"eval_samples_per_second": 51.223, |
|
"eval_sequential_score": 0.7967876172773333, |
|
"eval_steps_per_second": 0.403, |
|
"eval_sts-dev-128_pearson_cosine": 0.784502590249166, |
|
"eval_sts-dev-128_spearman_cosine": 0.7904922950886314, |
|
"eval_sts-dev-256_pearson_cosine": 0.7879728763645151, |
|
"eval_sts-dev-256_spearman_cosine": 0.7926398088881677, |
|
"eval_sts-dev-512_pearson_cosine": 0.79034320657993, |
|
"eval_sts-dev-512_spearman_cosine": 0.7941246347219933, |
|
"eval_sts-dev-64_pearson_cosine": 0.7732918356795019, |
|
"eval_sts-dev-64_spearman_cosine": 0.7822318891319547, |
|
"eval_sts-dev-768_pearson_cosine": 0.7932888301272122, |
|
"eval_sts-dev-768_spearman_cosine": 0.7967876172773333, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2815819787533598, |
|
"grad_norm": 3656165.0, |
|
"learning_rate": 3.9916085905276635e-05, |
|
"loss": 3.5024, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3071803404582107, |
|
"grad_norm": 4165362.5, |
|
"learning_rate": 3.849381311335514e-05, |
|
"loss": 3.2039, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3199795213106361, |
|
"eval_loss": 5.417263507843018, |
|
"eval_runtime": 190.9081, |
|
"eval_samples_per_second": 34.619, |
|
"eval_sequential_score": 0.7985143359688982, |
|
"eval_steps_per_second": 0.272, |
|
"eval_sts-dev-128_pearson_cosine": 0.7806598252859374, |
|
"eval_sts-dev-128_spearman_cosine": 0.7903599418037432, |
|
"eval_sts-dev-256_pearson_cosine": 0.7884086400168866, |
|
"eval_sts-dev-256_spearman_cosine": 0.7946240645451771, |
|
"eval_sts-dev-512_pearson_cosine": 0.7915267570052114, |
|
"eval_sts-dev-512_spearman_cosine": 0.7957194058029897, |
|
"eval_sts-dev-64_pearson_cosine": 0.7654385410326767, |
|
"eval_sts-dev-64_spearman_cosine": 0.7805870680928543, |
|
"eval_sts-dev-768_pearson_cosine": 0.7950747299618343, |
|
"eval_sts-dev-768_spearman_cosine": 0.7985143359688982, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.33277870216306155, |
|
"grad_norm": 3111441.75, |
|
"learning_rate": 3.707154032143365e-05, |
|
"loss": 3.1517, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.35837706386791246, |
|
"grad_norm": 4315246.0, |
|
"learning_rate": 3.5649267529512165e-05, |
|
"loss": 3.0409, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3839754255727633, |
|
"grad_norm": 4406995.0, |
|
"learning_rate": 3.422699473759067e-05, |
|
"loss": 2.9611, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3839754255727633, |
|
"eval_loss": 5.039449691772461, |
|
"eval_runtime": 186.7714, |
|
"eval_samples_per_second": 35.385, |
|
"eval_sequential_score": 0.7922583016547031, |
|
"eval_steps_per_second": 0.278, |
|
"eval_sts-dev-128_pearson_cosine": 0.782435760922896, |
|
"eval_sts-dev-128_spearman_cosine": 0.7847986500402634, |
|
"eval_sts-dev-256_pearson_cosine": 0.785603571035357, |
|
"eval_sts-dev-256_spearman_cosine": 0.7871271821843686, |
|
"eval_sts-dev-512_pearson_cosine": 0.7882585155923556, |
|
"eval_sts-dev-512_spearman_cosine": 0.7893899811664858, |
|
"eval_sts-dev-64_pearson_cosine": 0.7731641525494573, |
|
"eval_sts-dev-64_spearman_cosine": 0.7789396674425884, |
|
"eval_sts-dev-768_pearson_cosine": 0.7914921308293312, |
|
"eval_sts-dev-768_spearman_cosine": 0.7922583016547031, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.40957378727761423, |
|
"grad_norm": 4295917.5, |
|
"learning_rate": 3.280472194566918e-05, |
|
"loss": 2.8913, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.43517214898246515, |
|
"grad_norm": 3956720.0, |
|
"learning_rate": 3.138244915374769e-05, |
|
"loss": 2.6737, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4479713298348906, |
|
"eval_loss": 4.8449788093566895, |
|
"eval_runtime": 381.2673, |
|
"eval_samples_per_second": 17.334, |
|
"eval_sequential_score": 0.8124001874236463, |
|
"eval_steps_per_second": 0.136, |
|
"eval_sts-dev-128_pearson_cosine": 0.7982139278806232, |
|
"eval_sts-dev-128_spearman_cosine": 0.8076074750236868, |
|
"eval_sts-dev-256_pearson_cosine": 0.7999244325189871, |
|
"eval_sts-dev-256_spearman_cosine": 0.8075358060747592, |
|
"eval_sts-dev-512_pearson_cosine": 0.8054012343851129, |
|
"eval_sts-dev-512_spearman_cosine": 0.8110777459628828, |
|
"eval_sts-dev-64_pearson_cosine": 0.7826221833277923, |
|
"eval_sts-dev-64_spearman_cosine": 0.7968397814525646, |
|
"eval_sts-dev-768_pearson_cosine": 0.8074395668796857, |
|
"eval_sts-dev-768_spearman_cosine": 0.8124001874236463, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.460770510687316, |
|
"grad_norm": 3219552.5, |
|
"learning_rate": 2.99601763618262e-05, |
|
"loss": 2.6488, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4863688723921669, |
|
"grad_norm": 3358243.75, |
|
"learning_rate": 2.853790356990471e-05, |
|
"loss": 2.6208, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5119672340970178, |
|
"grad_norm": 4434459.5, |
|
"learning_rate": 2.7115630777983218e-05, |
|
"loss": 2.4823, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5119672340970178, |
|
"eval_loss": 4.5710768699646, |
|
"eval_runtime": 216.5327, |
|
"eval_samples_per_second": 30.522, |
|
"eval_sequential_score": 0.811115353774689, |
|
"eval_steps_per_second": 0.24, |
|
"eval_sts-dev-128_pearson_cosine": 0.7996500780619267, |
|
"eval_sts-dev-128_spearman_cosine": 0.8075174124731305, |
|
"eval_sts-dev-256_pearson_cosine": 0.8022455109638521, |
|
"eval_sts-dev-256_spearman_cosine": 0.8081526580763048, |
|
"eval_sts-dev-512_pearson_cosine": 0.8053149776225357, |
|
"eval_sts-dev-512_spearman_cosine": 0.8101542245323032, |
|
"eval_sts-dev-64_pearson_cosine": 0.7897403224233863, |
|
"eval_sts-dev-64_spearman_cosine": 0.8014649100236256, |
|
"eval_sts-dev-768_pearson_cosine": 0.8071703574474214, |
|
"eval_sts-dev-768_spearman_cosine": 0.811115353774689, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5375655958018687, |
|
"grad_norm": 3851683.75, |
|
"learning_rate": 2.569335798606173e-05, |
|
"loss": 2.5081, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5631639575067195, |
|
"grad_norm": 3974658.0, |
|
"learning_rate": 2.4271085194140237e-05, |
|
"loss": 2.3827, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.575963138359145, |
|
"eval_loss": 4.527626037597656, |
|
"eval_runtime": 247.0855, |
|
"eval_samples_per_second": 26.748, |
|
"eval_sequential_score": 0.8237398652434835, |
|
"eval_steps_per_second": 0.21, |
|
"eval_sts-dev-128_pearson_cosine": 0.8088977658009835, |
|
"eval_sts-dev-128_spearman_cosine": 0.8200038307453663, |
|
"eval_sts-dev-256_pearson_cosine": 0.8120809058779974, |
|
"eval_sts-dev-256_spearman_cosine": 0.8205438030370273, |
|
"eval_sts-dev-512_pearson_cosine": 0.8157264805096027, |
|
"eval_sts-dev-512_spearman_cosine": 0.8226500864435473, |
|
"eval_sts-dev-64_pearson_cosine": 0.7964069841493276, |
|
"eval_sts-dev-64_spearman_cosine": 0.8117000425044992, |
|
"eval_sts-dev-768_pearson_cosine": 0.8179809027318157, |
|
"eval_sts-dev-768_spearman_cosine": 0.8237398652434835, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5887623192115704, |
|
"grad_norm": 2969929.75, |
|
"learning_rate": 2.284881240221875e-05, |
|
"loss": 2.2867, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6143606809164214, |
|
"grad_norm": 3068778.0, |
|
"learning_rate": 2.1426539610297256e-05, |
|
"loss": 2.2608, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.6399590426212722, |
|
"grad_norm": 5789758.5, |
|
"learning_rate": 2.0004266818375767e-05, |
|
"loss": 2.6285, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6399590426212722, |
|
"eval_loss": 2.69280743598938, |
|
"eval_runtime": 587.2079, |
|
"eval_samples_per_second": 11.255, |
|
"eval_sequential_score": 0.812358680832537, |
|
"eval_steps_per_second": 0.089, |
|
"eval_sts-dev-128_pearson_cosine": 0.805215933596938, |
|
"eval_sts-dev-128_spearman_cosine": 0.8087200580616569, |
|
"eval_sts-dev-256_pearson_cosine": 0.8085076472836253, |
|
"eval_sts-dev-256_spearman_cosine": 0.8099002132418758, |
|
"eval_sts-dev-512_pearson_cosine": 0.8104747450142471, |
|
"eval_sts-dev-512_spearman_cosine": 0.8112676803940946, |
|
"eval_sts-dev-64_pearson_cosine": 0.7956052250413164, |
|
"eval_sts-dev-64_spearman_cosine": 0.8022672223914163, |
|
"eval_sts-dev-768_pearson_cosine": 0.8122169140710348, |
|
"eval_sts-dev-768_spearman_cosine": 0.812358680832537, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6655574043261231, |
|
"grad_norm": 7265908.5, |
|
"learning_rate": 1.8581994026454275e-05, |
|
"loss": 3.2569, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6911557660309741, |
|
"grad_norm": 5744435.0, |
|
"learning_rate": 1.7159721234532783e-05, |
|
"loss": 2.7108, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7039549468833994, |
|
"eval_loss": 3.4081127643585205, |
|
"eval_runtime": 113.2957, |
|
"eval_samples_per_second": 58.334, |
|
"eval_sequential_score": 0.8112072214643352, |
|
"eval_steps_per_second": 0.459, |
|
"eval_sts-dev-128_pearson_cosine": 0.8036120610302749, |
|
"eval_sts-dev-128_spearman_cosine": 0.8060065068978162, |
|
"eval_sts-dev-256_pearson_cosine": 0.8076653976665353, |
|
"eval_sts-dev-256_spearman_cosine": 0.8079945036667597, |
|
"eval_sts-dev-512_pearson_cosine": 0.8103438010262101, |
|
"eval_sts-dev-512_spearman_cosine": 0.8099837098639602, |
|
"eval_sts-dev-64_pearson_cosine": 0.7942652314065892, |
|
"eval_sts-dev-64_spearman_cosine": 0.7993950394097328, |
|
"eval_sts-dev-768_pearson_cosine": 0.8122845848971061, |
|
"eval_sts-dev-768_spearman_cosine": 0.8112072214643352, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7167541277358249, |
|
"grad_norm": 5608753.5, |
|
"learning_rate": 1.5737448442611294e-05, |
|
"loss": 2.2756, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.7423524894406758, |
|
"grad_norm": 4639111.5, |
|
"learning_rate": 1.4315175650689802e-05, |
|
"loss": 1.9964, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7679508511455266, |
|
"grad_norm": 3252142.5, |
|
"learning_rate": 1.2892902858768313e-05, |
|
"loss": 1.8278, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7679508511455266, |
|
"eval_loss": 3.626107931137085, |
|
"eval_runtime": 402.388, |
|
"eval_samples_per_second": 16.424, |
|
"eval_sequential_score": 0.8115702497698598, |
|
"eval_steps_per_second": 0.129, |
|
"eval_sts-dev-128_pearson_cosine": 0.8045525818531101, |
|
"eval_sts-dev-128_spearman_cosine": 0.8070988970713203, |
|
"eval_sts-dev-256_pearson_cosine": 0.808009240681977, |
|
"eval_sts-dev-256_spearman_cosine": 0.8087801893710216, |
|
"eval_sts-dev-512_pearson_cosine": 0.8100529203161405, |
|
"eval_sts-dev-512_spearman_cosine": 0.8101431817309978, |
|
"eval_sts-dev-64_pearson_cosine": 0.7958099227865137, |
|
"eval_sts-dev-64_spearman_cosine": 0.8012609269210379, |
|
"eval_sts-dev-768_pearson_cosine": 0.8121800215134509, |
|
"eval_sts-dev-768_spearman_cosine": 0.8115702497698598, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7935492128503776, |
|
"grad_norm": 7875703.0, |
|
"learning_rate": 1.147063006684682e-05, |
|
"loss": 1.7105, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.8191475745552285, |
|
"grad_norm": 4817139.5, |
|
"learning_rate": 1.0048357274925332e-05, |
|
"loss": 1.5719, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.831946755407654, |
|
"eval_loss": 3.7825615406036377, |
|
"eval_runtime": 260.9688, |
|
"eval_samples_per_second": 25.325, |
|
"eval_sequential_score": 0.809746911527668, |
|
"eval_steps_per_second": 0.199, |
|
"eval_sts-dev-128_pearson_cosine": 0.8019992344978342, |
|
"eval_sts-dev-128_spearman_cosine": 0.8040026735198376, |
|
"eval_sts-dev-256_pearson_cosine": 0.8069582374847454, |
|
"eval_sts-dev-256_spearman_cosine": 0.8072454625835676, |
|
"eval_sts-dev-512_pearson_cosine": 0.8088522922289604, |
|
"eval_sts-dev-512_spearman_cosine": 0.8085037600756491, |
|
"eval_sts-dev-64_pearson_cosine": 0.7917208895813888, |
|
"eval_sts-dev-64_spearman_cosine": 0.7966340332099681, |
|
"eval_sts-dev-768_pearson_cosine": 0.8110657541385573, |
|
"eval_sts-dev-768_spearman_cosine": 0.809746911527668, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8447459362600793, |
|
"grad_norm": 3659550.25, |
|
"learning_rate": 8.626084483003841e-06, |
|
"loss": 1.4569, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.8703442979649303, |
|
"grad_norm": 5161893.0, |
|
"learning_rate": 7.20381169108235e-06, |
|
"loss": 1.3572, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8959426596697811, |
|
"grad_norm": 3728513.5, |
|
"learning_rate": 5.781538899160859e-06, |
|
"loss": 1.2607, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8959426596697811, |
|
"eval_loss": 3.732253074645996, |
|
"eval_runtime": 343.8, |
|
"eval_samples_per_second": 19.223, |
|
"eval_sequential_score": 0.8113987863085653, |
|
"eval_steps_per_second": 0.151, |
|
"eval_sts-dev-128_pearson_cosine": 0.8052203471354245, |
|
"eval_sts-dev-128_spearman_cosine": 0.8069675064136675, |
|
"eval_sts-dev-256_pearson_cosine": 0.8090504276321419, |
|
"eval_sts-dev-256_spearman_cosine": 0.8092690932878782, |
|
"eval_sts-dev-512_pearson_cosine": 0.8106848356090132, |
|
"eval_sts-dev-512_spearman_cosine": 0.8102076026050016, |
|
"eval_sts-dev-64_pearson_cosine": 0.7959836620564662, |
|
"eval_sts-dev-64_spearman_cosine": 0.8004825005334193, |
|
"eval_sts-dev-768_pearson_cosine": 0.8127183729174601, |
|
"eval_sts-dev-768_spearman_cosine": 0.8113987863085653, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.921541021374632, |
|
"grad_norm": 3384944.0, |
|
"learning_rate": 4.359266107239369e-06, |
|
"loss": 1.1676, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.9471393830794829, |
|
"grad_norm": 3170591.5, |
|
"learning_rate": 2.936993315317878e-06, |
|
"loss": 1.1663, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.9599385639319084, |
|
"eval_loss": 3.83072829246521, |
|
"eval_runtime": 240.3095, |
|
"eval_samples_per_second": 27.502, |
|
"eval_sequential_score": 0.8100586279907306, |
|
"eval_steps_per_second": 0.216, |
|
"eval_sts-dev-128_pearson_cosine": 0.8028710019029521, |
|
"eval_sts-dev-128_spearman_cosine": 0.8054855987917489, |
|
"eval_sts-dev-256_pearson_cosine": 0.8076510620939634, |
|
"eval_sts-dev-256_spearman_cosine": 0.8080588277305082, |
|
"eval_sts-dev-512_pearson_cosine": 0.8092891955563192, |
|
"eval_sts-dev-512_spearman_cosine": 0.8087644228771842, |
|
"eval_sts-dev-64_pearson_cosine": 0.7923252906438638, |
|
"eval_sts-dev-64_spearman_cosine": 0.7975941111911333, |
|
"eval_sts-dev-768_pearson_cosine": 0.8111988062913815, |
|
"eval_sts-dev-768_spearman_cosine": 0.8100586279907306, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9727377447843338, |
|
"grad_norm": 3539899.5, |
|
"learning_rate": 1.5147205233963876e-06, |
|
"loss": 1.1079, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.9983361064891847, |
|
"grad_norm": 4290327.5, |
|
"learning_rate": 9.24477314748969e-08, |
|
"loss": 1.0827, |
|
"step": 7800 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 7813, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|