maesneako commited on
Commit
d821143
·
verified ·
1 Parent(s): aac5c48

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7692468478c3db0b8659c8f2ee3b9e272f41ecd69fca30ed9e07602d587fbfe4
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55da07e581fd8096ebcb45a8531fcba23a375781ddfb9f0a5eb59b864c325e73
3
  size 497774208
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e9ac0a28498c58f3d7a3c9319a7a0ee3b6720211b24fdcb67874945feb1a5b0
3
  size 995642298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:980631eae589d47b66278f56c4908568b059b0af50129288193a6f342b8a142d
3
  size 995642298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e702cb88a319fefdd66f80ca145cdc895eff77e02cbdc2c31c14f1339cb5be
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c51c2ab03684c0bbce834b4bc54e9891cdd083cfb324a59875bc7013bce092
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c1f24890119491fd7458ee672e4941d2dfe0b108325772bab0bd714e1f25f49
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934076cda4dcb4b4847ec5a9b0c790f7f9c404aea04451cf16edc88c60c7b83e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.768068552017212,
3
- "best_model_checkpoint": "./FR_bkt_dbddv01-gpt2-french-small/checkpoint-6000",
4
- "epoch": 4.366812227074236,
5
  "eval_steps": 2000,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -103,13 +103,45 @@
103
  "eval_samples_per_second": 563.205,
104
  "eval_steps_per_second": 35.221,
105
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 500,
109
  "max_steps": 9618,
110
  "num_train_epochs": 7,
111
  "save_steps": 2000,
112
- "total_flos": 3133936631808000.0,
113
  "trial_name": null,
114
  "trial_params": null
115
  }
 
1
  {
2
+ "best_metric": 3.7332470417022705,
3
+ "best_model_checkpoint": "./FR_bkt_dbddv01-gpt2-french-small/checkpoint-8000",
4
+ "epoch": 5.822416302765648,
5
  "eval_steps": 2000,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
103
  "eval_samples_per_second": 563.205,
104
  "eval_steps_per_second": 35.221,
105
  "step": 6000
106
+ },
107
+ {
108
+ "epoch": 4.73,
109
+ "learning_rate": 3.310681673391378e-07,
110
+ "loss": 3.8073,
111
+ "step": 6500
112
+ },
113
+ {
114
+ "epoch": 5.09,
115
+ "learning_rate": 2.779783393501805e-07,
116
+ "loss": 3.7946,
117
+ "step": 7000
118
+ },
119
+ {
120
+ "epoch": 5.46,
121
+ "learning_rate": 2.248885113612232e-07,
122
+ "loss": 3.7856,
123
+ "step": 7500
124
+ },
125
+ {
126
+ "epoch": 5.82,
127
+ "learning_rate": 1.7179868337226585e-07,
128
+ "loss": 3.7653,
129
+ "step": 8000
130
+ },
131
+ {
132
+ "epoch": 5.82,
133
+ "eval_loss": 3.7332470417022705,
134
+ "eval_runtime": 8.9385,
135
+ "eval_samples_per_second": 579.625,
136
+ "eval_steps_per_second": 36.248,
137
+ "step": 8000
138
  }
139
  ],
140
  "logging_steps": 500,
141
  "max_steps": 9618,
142
  "num_train_epochs": 7,
143
  "save_steps": 2000,
144
+ "total_flos": 4178712821760000.0,
145
  "trial_name": null,
146
  "trial_params": null
147
  }