Training in progress, step 8000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7692468478c3db0b8659c8f2ee3b9e272f41ecd69fca30ed9e07602d587fbfe4
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:55da07e581fd8096ebcb45a8531fcba23a375781ddfb9f0a5eb59b864c325e73
 size 497774208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e9ac0a28498c58f3d7a3c9319a7a0ee3b6720211b24fdcb67874945feb1a5b0
 size 995642298

 version https://git-lfs.github.com/spec/v1
+oid sha256:980631eae589d47b66278f56c4908568b059b0af50129288193a6f342b8a142d
 size 995642298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35e702cb88a319fefdd66f80ca145cdc895eff77e02cbdc2c31c14f1339cb5be
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4c51c2ab03684c0bbce834b4bc54e9891cdd083cfb324a59875bc7013bce092
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c1f24890119491fd7458ee672e4941d2dfe0b108325772bab0bd714e1f25f49
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:934076cda4dcb4b4847ec5a9b0c790f7f9c404aea04451cf16edc88c60c7b83e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.768068552017212,
-  "best_model_checkpoint": "./FR_bkt_dbddv01-gpt2-french-small/checkpoint-6000",
-  "epoch": 4.366812227074236,
   "eval_steps": 2000,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -103,13 +103,45 @@
       "eval_samples_per_second": 563.205,
       "eval_steps_per_second": 35.221,
       "step": 6000
     }
   ],
   "logging_steps": 500,
   "max_steps": 9618,
   "num_train_epochs": 7,
   "save_steps": 2000,
-  "total_flos": 3133936631808000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 3.7332470417022705,
+  "best_model_checkpoint": "./FR_bkt_dbddv01-gpt2-french-small/checkpoint-8000",
+  "epoch": 5.822416302765648,
   "eval_steps": 2000,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 563.205,
       "eval_steps_per_second": 35.221,
       "step": 6000
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 3.310681673391378e-07,
+      "loss": 3.8073,
+      "step": 6500
+    },
+    {
+      "epoch": 5.09,
+      "learning_rate": 2.779783393501805e-07,
+      "loss": 3.7946,
+      "step": 7000
+    },
+    {
+      "epoch": 5.46,
+      "learning_rate": 2.248885113612232e-07,
+      "loss": 3.7856,
+      "step": 7500
+    },
+    {
+      "epoch": 5.82,
+      "learning_rate": 1.7179868337226585e-07,
+      "loss": 3.7653,
+      "step": 8000
+    },
+    {
+      "epoch": 5.82,
+      "eval_loss": 3.7332470417022705,
+      "eval_runtime": 8.9385,
+      "eval_samples_per_second": 579.625,
+      "eval_steps_per_second": 36.248,
+      "step": 8000
     }
   ],
   "logging_steps": 500,
   "max_steps": 9618,
   "num_train_epochs": 7,
   "save_steps": 2000,
+  "total_flos": 4178712821760000.0,
   "trial_name": null,
   "trial_params": null
 }