[ { "loss": 0.7745, "learning_rate": 0.0002, "epoch": 0.05, "step": 1 }, { "loss": 0.6967, "learning_rate": 0.0002, "epoch": 0.11, "step": 2 }, { "loss": 0.7227, "learning_rate": 0.0002, "epoch": 0.16, "step": 3 }, { "loss": 0.71, "learning_rate": 0.0002, "epoch": 0.21, "step": 4 }, { "loss": 0.6809, "learning_rate": 0.0002, "epoch": 0.26, "step": 5 }, { "loss": 0.7043, "learning_rate": 0.0002, "epoch": 0.32, "step": 6 }, { "loss": 0.6713, "learning_rate": 0.0002, "epoch": 0.37, "step": 7 }, { "loss": 0.6788, "learning_rate": 0.0002, "epoch": 0.42, "step": 8 }, { "loss": 0.6522, "learning_rate": 0.0002, "epoch": 0.47, "step": 9 }, { "loss": 0.6264, "learning_rate": 0.0002, "epoch": 0.53, "step": 10 }, { "loss": 0.6381, "learning_rate": 0.0002, "epoch": 0.58, "step": 11 }, { "loss": 0.6187, "learning_rate": 0.0002, "epoch": 0.63, "step": 12 }, { "loss": 0.5279, "learning_rate": 0.0002, "epoch": 0.68, "step": 13 }, { "loss": 0.5766, "learning_rate": 0.0002, "epoch": 0.74, "step": 14 }, { "loss": 0.4784, "learning_rate": 0.0002, "epoch": 0.79, "step": 15 }, { "loss": 0.4198, "learning_rate": 0.0002, "epoch": 0.84, "step": 16 }, { "loss": 0.5876, "learning_rate": 0.0002, "epoch": 0.89, "step": 17 }, { "loss": 0.4181, "learning_rate": 0.0002, "epoch": 0.95, "step": 18 }, { "loss": 0.4716, "learning_rate": 0.0002, "epoch": 1.0, "step": 19 }, { "loss": 0.6533, "learning_rate": 0.0002, "epoch": 1.05, "step": 20 }, { "loss": 0.3915, "learning_rate": 0.0002, "epoch": 1.11, "step": 21 }, { "loss": 0.5059, "learning_rate": 0.0002, "epoch": 1.16, "step": 22 }, { "loss": 0.4458, "learning_rate": 0.0002, "epoch": 1.21, "step": 23 }, { "loss": 0.5159, "learning_rate": 0.0002, "epoch": 1.26, "step": 24 }, { "loss": 0.4388, "learning_rate": 0.0002, "epoch": 1.32, "step": 25 }, { "eval_quote_attribution_loss": 0.3903699815273285, "eval_quote_attribution_score": -0.13283972442150116, "eval_quote_attribution_brier_score": 0.13283972442150116, "eval_quote_attribution_average_probability": 0.7337198853492737, "eval_quote_attribution_accuracy": 0.76, "eval_quote_attribution_probabilities": [ 0.3397865295410156, 0.27400609850883484, 0.6274727582931519, 0.5726096034049988, 0.7350274324417114, 0.89092618227005, 0.8754991292953491, 0.9745182394981384, 0.9272416830062866, 0.8318775296211243, 0.9994543194770813, 0.9697004556655884, 0.6574472784996033, 0.9558533430099487, 0.7987542152404785, 0.7501387596130371, 0.45201608538627625, 0.9025270342826843, 0.6715742945671082, 0.968725860118866, 0.91517174243927, 0.6382958292961121, 0.949180543422699, 0.9957343935966492, 0.31578928232192993, 0.6597267985343933, 0.8947572708129883, 0.2931935787200928, 0.8521986603736877, 0.9986943602561951, 0.9458821415901184, 0.9390006065368652, 0.8302227258682251, 0.19578516483306885, 0.9109433889389038, 0.12956511974334717, 0.6802142262458801, 0.937251091003418, 0.8683292865753174, 0.9591161012649536, 0.8652911186218262, 0.9901982545852661, 0.8897508382797241, 0.2960745394229889, 0.40311864018440247, 0.17587710916996002, 0.9315513968467712, 0.48898300528526306, 0.52398282289505, 0.46714815497398376, 0.9947591423988342, 0.7912486791610718, 0.9232833385467529, 0.9261313676834106, 0.7881039381027222, 0.9963042736053467, 0.4191492199897766, 0.8374874591827393, 0.5932137966156006, 0.9614864587783813, 0.37411344051361084, 0.5918532013893127, 0.39012864232063293, 0.28125879168510437, 0.8941668272018433, 0.8424620628356934, 0.8092784285545349, 0.7347500920295715, 0.9776512980461121, 0.9989927411079407, 0.5517320036888123, 0.5454489588737488, 0.3367040455341339, 0.7517660856246948, 0.9410293698310852, 0.8296215534210205, 0.820630669593811, 0.42773497104644775, 0.9955050349235535, 0.9657703638076782, 0.7948038578033447, 0.2974149286746979, 0.998604953289032, 0.997099757194519, 0.7744022607803345, 0.9542419910430908, 0.964718759059906, 0.29336974024772644, 0.9298573136329651, 0.9975958466529846, 0.8341689109802246, 0.4438086152076721, 0.8190150260925293, 0.9896742701530457, 0.4560953378677368, 0.9868803024291992, 0.7759295105934143, 0.49203911423683167, 0.8246551156044006, 0.3396430015563965 ], "eval_quote_attribution_runtime": 14.5782, "eval_quote_attribution_samples_per_second": 6.86, "eval_quote_attribution_steps_per_second": 0.137, "epoch": 1.32, "step": 25 }, { "loss": 0.3012, "learning_rate": 0.0002, "epoch": 1.37, "step": 26 }, { "loss": 0.2573, "learning_rate": 0.0002, "epoch": 1.42, "step": 27 }, { "loss": 0.2884, "learning_rate": 0.0002, "epoch": 1.47, "step": 28 }, { "loss": 0.3465, "learning_rate": 0.0002, "epoch": 1.53, "step": 29 }, { "loss": 0.4097, "learning_rate": 0.0002, "epoch": 1.58, "step": 30 }, { "loss": 0.3226, "learning_rate": 0.0002, "epoch": 1.63, "step": 31 }, { "loss": 0.3939, "learning_rate": 0.0002, "epoch": 1.68, "step": 32 }, { "loss": 0.4634, "learning_rate": 0.0002, "epoch": 1.74, "step": 33 }, { "loss": 0.245, "learning_rate": 0.0002, "epoch": 1.79, "step": 34 }, { "loss": 0.5877, "learning_rate": 0.0002, "epoch": 1.84, "step": 35 }, { "loss": 0.34, "learning_rate": 0.0002, "epoch": 1.89, "step": 36 }, { "loss": 0.3888, "learning_rate": 0.0002, "epoch": 1.95, "step": 37 }, { "loss": 0.4495, "learning_rate": 0.0002, "epoch": 2.0, "step": 38 }, { "loss": 0.3455, "learning_rate": 0.0002, "epoch": 2.05, "step": 39 }, { "loss": 0.3999, "learning_rate": 0.0002, "epoch": 2.11, "step": 40 }, { "loss": 0.3567, "learning_rate": 0.0002, "epoch": 2.16, "step": 41 }, { "loss": 0.1923, "learning_rate": 0.0002, "epoch": 2.21, "step": 42 }, { "loss": 0.3432, "learning_rate": 0.0002, "epoch": 2.26, "step": 43 }, { "loss": 0.4009, "learning_rate": 0.0002, "epoch": 2.32, "step": 44 }, { "loss": 0.3515, "learning_rate": 0.0002, "epoch": 2.37, "step": 45 }, { "loss": 0.2747, "learning_rate": 0.0002, "epoch": 2.42, "step": 46 }, { "loss": 0.258, "learning_rate": 0.0002, "epoch": 2.47, "step": 47 }, { "loss": 0.1799, "learning_rate": 0.0002, "epoch": 2.53, "step": 48 }, { "loss": 0.1551, "learning_rate": 0.0002, "epoch": 2.58, "step": 49 }, { "loss": 0.1258, "learning_rate": 0.0002, "epoch": 2.63, "step": 50 }, { "eval_quote_attribution_loss": 0.3712925314903259, "eval_quote_attribution_score": -0.12255415320396423, "eval_quote_attribution_brier_score": 0.12255415320396423, "eval_quote_attribution_average_probability": 0.7985621094703674, "eval_quote_attribution_accuracy": 0.84, "eval_quote_attribution_probabilities": [ 0.7860729098320007, 0.01861773245036602, 0.5588395595550537, 0.7460629343986511, 0.9215473532676697, 0.9298348426818848, 0.9293332695960999, 0.9996851682662964, 0.9933885335922241, 0.9907566905021667, 0.9999681711196899, 0.9971022009849548, 0.7689657211303711, 0.9484009146690369, 0.13260115683078766, 0.9933617115020752, 0.9818628430366516, 0.8795493841171265, 0.9840488433837891, 0.9823899269104004, 0.994962215423584, 0.731959342956543, 0.9895108342170715, 0.9971423745155334, 0.5458362698554993, 0.3520396947860718, 0.9780328273773193, 0.5804782509803772, 0.9987543821334839, 0.9999915361404419, 0.8940104246139526, 0.9943435192108154, 0.19301150739192963, 0.0275607667863369, 0.8172199726104736, 0.3415309190750122, 0.6565711498260498, 0.9999420642852783, 0.9682236313819885, 0.9904249310493469, 0.5104338526725769, 0.9565191864967346, 0.6578478813171387, 0.9731898903846741, 0.12479862570762634, 0.2389501929283142, 0.9803010821342468, 0.843787431716919, 0.9813414812088013, 0.7190759778022766, 0.9995619654655457, 0.9799215793609619, 0.9990746974945068, 0.9562553763389587, 0.9879046678543091, 0.9993870258331299, 0.5144264101982117, 0.7331254482269287, 0.965781569480896, 0.9958714842796326, 0.8164321780204773, 0.4206125736236572, 0.9912711381912231, 0.10806604474782944, 0.9852036237716675, 0.9687488079071045, 0.9890704154968262, 0.37852346897125244, 0.999263346195221, 0.9985498785972595, 0.9326132535934448, 0.7800835371017456, 0.2572803497314453, 0.7245361804962158, 0.9989525079727173, 0.8833596110343933, 0.9957029223442078, 0.150175079703331, 0.9998432397842407, 0.9937664270401001, 0.6764500141143799, 0.8286989331245422, 0.9984079003334045, 0.9999899864196777, 0.9884868860244751, 0.9792146682739258, 0.9994243383407593, 0.2555789649486542, 0.8536803722381592, 0.9992557168006897, 0.9788174033164978, 0.9965229034423828, 0.9877134561538696, 0.9979069232940674, 0.8417850732803345, 0.9946957230567932, 0.9472989439964294, 0.022070081904530525, 0.9694492220878601, 0.45721665024757385 ], "eval_quote_attribution_runtime": 14.5602, "eval_quote_attribution_samples_per_second": 6.868, "eval_quote_attribution_steps_per_second": 0.137, "epoch": 2.63, "step": 50 }, { "loss": 0.2307, "learning_rate": 0.0002, "epoch": 2.68, "step": 51 }, { "loss": 0.2822, "learning_rate": 0.0002, "epoch": 2.74, "step": 52 }, { "loss": 0.4234, "learning_rate": 0.0002, "epoch": 2.79, "step": 53 }, { "loss": 0.1328, "learning_rate": 0.0002, "epoch": 2.84, "step": 54 }, { "loss": 0.2677, "learning_rate": 0.0002, "epoch": 2.89, "step": 55 }, { "loss": 0.5528, "learning_rate": 0.0002, "epoch": 2.95, "step": 56 }, { "loss": 0.2496, "learning_rate": 0.0002, "epoch": 3.0, "step": 57 }, { "loss": 0.3588, "learning_rate": 0.0002, "epoch": 3.05, "step": 58 }, { "loss": 0.2189, "learning_rate": 0.0002, "epoch": 3.11, "step": 59 }, { "loss": 0.1681, "learning_rate": 0.0002, "epoch": 3.16, "step": 60 }, { "loss": 0.1513, "learning_rate": 0.0002, "epoch": 3.21, "step": 61 }, { "loss": 0.3061, "learning_rate": 0.0002, "epoch": 3.26, "step": 62 }, { "loss": 0.1747, "learning_rate": 0.0002, "epoch": 3.32, "step": 63 }, { "loss": 0.7051, "learning_rate": 0.0002, "epoch": 3.37, "step": 64 }, { "loss": 0.3258, "learning_rate": 0.0002, "epoch": 3.42, "step": 65 }, { "loss": 0.2501, "learning_rate": 0.0002, "epoch": 3.47, "step": 66 }, { "loss": 0.3557, "learning_rate": 0.0002, "epoch": 3.53, "step": 67 }, { "loss": 0.2547, "learning_rate": 0.0002, "epoch": 3.58, "step": 68 }, { "loss": 0.3042, "learning_rate": 0.0002, "epoch": 3.63, "step": 69 }, { "loss": 0.2302, "learning_rate": 0.0002, "epoch": 3.68, "step": 70 }, { "loss": 0.3719, "learning_rate": 0.0002, "epoch": 3.74, "step": 71 }, { "loss": 0.1577, "learning_rate": 0.0002, "epoch": 3.79, "step": 72 }, { "loss": 0.2379, "learning_rate": 0.0002, "epoch": 3.84, "step": 73 }, { "loss": 0.1758, "learning_rate": 0.0002, "epoch": 3.89, "step": 74 }, { "loss": 0.2782, "learning_rate": 0.0002, "epoch": 3.95, "step": 75 }, { "eval_quote_attribution_loss": 0.26001179218292236, "eval_quote_attribution_score": -0.08528769761323929, "eval_quote_attribution_brier_score": 0.08528769761323929, "eval_quote_attribution_average_probability": 0.8168715834617615, "eval_quote_attribution_accuracy": 0.89, "eval_quote_attribution_probabilities": [ 0.9026296138763428, 0.1851932406425476, 0.9768511652946472, 0.4401264786720276, 0.8363838195800781, 0.8475701808929443, 0.9538756012916565, 0.9998526573181152, 0.9873288869857788, 0.9786556959152222, 0.999768078327179, 0.9965505599975586, 0.7447078824043274, 0.9885498881340027, 0.2625715136528015, 0.9957558512687683, 0.9653661251068115, 0.9227031469345093, 0.9769460558891296, 0.8824784159660339, 0.9924226403236389, 0.7130450010299683, 0.9792454242706299, 0.9994041919708252, 0.6425893306732178, 0.7295198440551758, 0.9447097778320312, 0.8904377818107605, 0.9716805815696716, 0.9999940395355225, 0.9534645676612854, 0.9940650463104248, 0.6751196384429932, 0.23568245768547058, 0.7629656791687012, 0.6381295919418335, 0.893238365650177, 0.9958881735801697, 0.9216209053993225, 0.9992305040359497, 0.7711086273193359, 0.85187166929245, 0.7716799378395081, 0.8794398903846741, 0.46626409888267517, 0.14490152895450592, 0.9684292674064636, 0.733173668384552, 0.8834093809127808, 0.5463492274284363, 0.9875890612602234, 0.984916627407074, 0.9804478883743286, 0.8795258402824402, 0.9703794121742249, 0.9997115731239319, 0.5106987953186035, 0.5688933730125427, 0.9016174077987671, 0.9948227405548096, 0.5480153560638428, 0.13955558836460114, 0.997481644153595, 0.4037991166114807, 0.9725149869918823, 0.8801470994949341, 0.9700154066085815, 0.5698285698890686, 0.9974080920219421, 0.9997511506080627, 0.7093798518180847, 0.7934067845344543, 0.41924339532852173, 0.7720069289207458, 0.9965391159057617, 0.6835947036743164, 0.9877316355705261, 0.5307928919792175, 0.9996629953384399, 0.9944919347763062, 0.8758867383003235, 0.9088449478149414, 0.9994428753852844, 0.999991774559021, 0.7853628396987915, 0.9889209270477295, 0.9977598190307617, 0.4099595844745636, 0.7633507251739502, 0.9992504715919495, 0.9958760142326355, 0.8863640427589417, 0.9793367385864258, 0.9934154748916626, 0.7374902367591858, 0.9700621962547302, 0.882457435131073, 0.22204913198947906, 0.8173033595085144, 0.5371173620223999 ], "eval_quote_attribution_runtime": 14.5681, "eval_quote_attribution_samples_per_second": 6.864, "eval_quote_attribution_steps_per_second": 0.137, "epoch": 3.95, "step": 75 }, { "loss": 0.2524, "learning_rate": 0.0002, "epoch": 4.0, "step": 76 }, { "loss": 0.1857, "learning_rate": 0.0002, "epoch": 4.05, "step": 77 }, { "loss": 0.0754, "learning_rate": 0.0002, "epoch": 4.11, "step": 78 }, { "loss": 0.1613, "learning_rate": 0.0002, "epoch": 4.16, "step": 79 }, { "loss": 0.138, "learning_rate": 0.0002, "epoch": 4.21, "step": 80 }, { "loss": 0.3025, "learning_rate": 0.0002, "epoch": 4.26, "step": 81 }, { "loss": 0.105, "learning_rate": 0.0002, "epoch": 4.32, "step": 82 }, { "loss": 0.2233, "learning_rate": 0.0002, "epoch": 4.37, "step": 83 }, { "loss": 0.4177, "learning_rate": 0.0002, "epoch": 4.42, "step": 84 }, { "loss": 0.1342, "learning_rate": 0.0002, "epoch": 4.47, "step": 85 }, { "loss": 0.1978, "learning_rate": 0.0002, "epoch": 4.53, "step": 86 }, { "loss": 0.0881, "learning_rate": 0.0002, "epoch": 4.58, "step": 87 }, { "loss": 0.278, "learning_rate": 0.0002, "epoch": 4.63, "step": 88 }, { "loss": 0.1715, "learning_rate": 0.0002, "epoch": 4.68, "step": 89 }, { "loss": 0.151, "learning_rate": 0.0002, "epoch": 4.74, "step": 90 }, { "loss": 0.1379, "learning_rate": 0.0002, "epoch": 4.79, "step": 91 }, { "loss": 0.1585, "learning_rate": 0.0002, "epoch": 4.84, "step": 92 }, { "loss": 0.0984, "learning_rate": 0.0002, "epoch": 4.89, "step": 93 }, { "loss": 0.0954, "learning_rate": 0.0002, "epoch": 4.95, "step": 94 }, { "loss": 0.1645, "learning_rate": 0.0002, "epoch": 5.0, "step": 95 }, { "loss": 0.0976, "learning_rate": 0.0002, "epoch": 5.05, "step": 96 }, { "loss": 0.0487, "learning_rate": 0.0002, "epoch": 5.11, "step": 97 }, { "loss": 0.0413, "learning_rate": 0.0002, "epoch": 5.16, "step": 98 }, { "loss": 0.1954, "learning_rate": 0.0002, "epoch": 5.21, "step": 99 }, { "loss": 0.0346, "learning_rate": 0.0002, "epoch": 5.26, "step": 100 }, { "eval_quote_attribution_loss": 0.3599839508533478, "eval_quote_attribution_score": -0.10510585457086563, "eval_quote_attribution_brier_score": 0.10510585457086563, "eval_quote_attribution_average_probability": 0.8306464552879333, "eval_quote_attribution_accuracy": 0.85, "eval_quote_attribution_probabilities": [ 0.9992765784263611, 0.28390198945999146, 0.990900993347168, 0.45075294375419617, 0.3492777943611145, 0.9984502792358398, 0.9445596933364868, 0.9942185878753662, 0.9969722032546997, 0.992466151714325, 0.998611569404602, 0.9973504543304443, 0.9592073559761047, 0.99977046251297, 0.7703073024749756, 0.9986199140548706, 0.9545236825942993, 0.9561163187026978, 0.8402053117752075, 0.6840088367462158, 0.9999091625213623, 0.9983439445495605, 0.9908946752548218, 0.9997195601463318, 0.42451944947242737, 0.573303759098053, 0.9977748990058899, 0.6849521994590759, 0.9999692440032959, 0.9999998807907104, 0.9983345866203308, 0.9958279728889465, 0.9304934740066528, 0.005678042769432068, 0.9995090961456299, 0.7211510539054871, 0.9996688365936279, 0.999981164932251, 0.931464672088623, 0.980169951915741, 0.9902687072753906, 0.6067255735397339, 0.992051362991333, 0.9783447980880737, 0.0010329392971470952, 0.6434565782546997, 0.9944517016410828, 0.3135684132575989, 0.7568038702011108, 0.8766343593597412, 0.9999866485595703, 0.577447772026062, 0.9989136457443237, 0.6557362079620361, 0.9996137022972107, 0.9999902248382568, 0.9171779751777649, 0.9960963129997253, 0.9912585616111755, 0.9999946355819702, 0.28503158688545227, 0.9179512858390808, 0.9990841150283813, 0.25173652172088623, 0.968471884727478, 0.9983065128326416, 0.989474356174469, 0.774908721446991, 0.9996628761291504, 0.999954342842102, 0.9592410326004028, 0.9524649977684021, 0.13191652297973633, 0.9956117272377014, 0.9875655770301819, 0.8039676547050476, 0.980216920375824, 0.28019583225250244, 0.9999899864196777, 0.9999717473983765, 0.6315051913261414, 0.9928225874900818, 0.9998170733451843, 0.9880472421646118, 0.8217127919197083, 0.9996758699417114, 0.9998898506164551, 0.15411104261875153, 0.9987577199935913, 0.9999146461486816, 0.9998713731765747, 0.35211181640625, 0.9824928045272827, 0.2578616440296173, 0.9802077412605286, 0.9736130237579346, 0.9994499087333679, 0.017336532473564148, 0.9996139407157898, 0.9593958258628845 ], "eval_quote_attribution_runtime": 14.5587, "eval_quote_attribution_samples_per_second": 6.869, "eval_quote_attribution_steps_per_second": 0.137, "epoch": 5.26, "step": 100 }, { "train_runtime": 927.5167, "train_samples_per_second": 3.45, "train_steps_per_second": 0.108, "total_flos": 0.0, "train_loss": 0.3390199049934745, "epoch": 5.26, "step": 100 } ]]