Tianjiao-Yu commited on
Commit
79d8c28
·
verified ·
1 Parent(s): 3bf783b

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: cc-by-nc-4.0
3
- base_model: MCG-NJU/videomae-huge-finetuned-kinetics
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # videomae-huge
17
 
18
- This model is a fine-tuned version of [MCG-NJU/videomae-huge-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-huge-finetuned-kinetics) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5347
21
  - Accuracy: 0.4286
22
 
23
  ## Model description
@@ -38,58 +38,24 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
- - train_batch_size: 16
42
- - eval_batch_size: 16
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.1
47
- - training_steps: 275
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
- | 2.6465 | 0.03 | 7 | 2.6473 | 0.0714 |
54
- | 2.6262 | 1.03 | 14 | 2.6245 | 0.1429 |
55
- | 2.5179 | 2.03 | 21 | 2.5627 | 0.1786 |
56
- | 2.4135 | 3.03 | 28 | 2.4946 | 0.1429 |
57
- | 2.1202 | 4.03 | 35 | 2.4093 | 0.3214 |
58
- | 1.848 | 5.03 | 42 | 2.3353 | 0.2857 |
59
- | 1.726 | 6.03 | 49 | 2.2536 | 0.3214 |
60
- | 1.3251 | 7.03 | 56 | 2.1674 | 0.3571 |
61
- | 1.174 | 8.03 | 63 | 2.1128 | 0.3571 |
62
- | 0.9108 | 9.03 | 70 | 2.0246 | 0.4286 |
63
- | 0.7467 | 10.03 | 77 | 1.9720 | 0.3571 |
64
- | 0.6224 | 11.03 | 84 | 1.9130 | 0.3929 |
65
- | 0.4737 | 12.03 | 91 | 1.9136 | 0.3929 |
66
- | 0.3682 | 13.03 | 98 | 1.8184 | 0.4643 |
67
- | 0.2657 | 14.03 | 105 | 1.8734 | 0.3571 |
68
- | 0.2085 | 15.03 | 112 | 1.8544 | 0.4286 |
69
- | 0.175 | 16.03 | 119 | 1.8411 | 0.3929 |
70
- | 0.1327 | 17.03 | 126 | 1.7585 | 0.3929 |
71
- | 0.1001 | 18.03 | 133 | 1.8193 | 0.3929 |
72
- | 0.0832 | 19.03 | 140 | 1.7595 | 0.3929 |
73
- | 0.085 | 20.03 | 147 | 1.7836 | 0.4286 |
74
- | 0.0614 | 21.03 | 154 | 1.7182 | 0.4286 |
75
- | 0.0496 | 22.03 | 161 | 1.7845 | 0.4286 |
76
- | 0.0474 | 23.03 | 168 | 1.7712 | 0.4643 |
77
- | 0.0403 | 24.03 | 175 | 1.7409 | 0.4643 |
78
- | 0.0395 | 25.03 | 182 | 1.7425 | 0.4643 |
79
- | 0.0369 | 26.03 | 189 | 1.7532 | 0.4643 |
80
- | 0.0346 | 27.03 | 196 | 1.7388 | 0.5 |
81
- | 0.0321 | 28.03 | 203 | 1.7391 | 0.5 |
82
- | 0.0314 | 29.03 | 210 | 1.7470 | 0.5 |
83
- | 0.0313 | 30.03 | 217 | 1.7349 | 0.4643 |
84
- | 0.0307 | 31.03 | 224 | 1.7574 | 0.4643 |
85
- | 0.0283 | 32.03 | 231 | 1.7857 | 0.4286 |
86
- | 0.0276 | 33.03 | 238 | 1.7865 | 0.4643 |
87
- | 0.0257 | 34.03 | 245 | 1.7707 | 0.4643 |
88
- | 0.0264 | 35.03 | 252 | 1.7683 | 0.4643 |
89
- | 0.0254 | 36.03 | 259 | 1.7712 | 0.4643 |
90
- | 0.0257 | 37.03 | 266 | 1.7736 | 0.4643 |
91
- | 0.0246 | 38.03 | 273 | 1.7748 | 0.4643 |
92
- | 0.0242 | 39.01 | 275 | 1.7750 | 0.4643 |
93
 
94
 
95
  ### Framework versions
 
1
  ---
2
  license: cc-by-nc-4.0
3
+ base_model: MCG-NJU/videomae-base-finetuned-kinetics
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # videomae-huge
17
 
18
+ This model is a fine-tuned version of [MCG-NJU/videomae-base-finetuned-kinetics](https://huggingface.co/MCG-NJU/videomae-base-finetuned-kinetics) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.0714
21
  - Accuracy: 0.4286
22
 
23
  ## Model description
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.1
47
+ - training_steps: 78
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 2.6392 | 0.18 | 14 | 2.5780 | 0.25 |
54
+ | 1.994 | 1.18 | 28 | 2.3995 | 0.3929 |
55
+ | 1.6374 | 2.18 | 42 | 2.3010 | 0.3929 |
56
+ | 1.124 | 3.18 | 56 | 2.2242 | 0.3929 |
57
+ | 0.9569 | 4.18 | 70 | 2.1825 | 0.4286 |
58
+ | 0.8862 | 5.1 | 78 | 2.1734 | 0.4286 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.01,
3
  "eval_accuracy": 0.42857142857142855,
4
- "eval_loss": 1.5347131490707397,
5
- "eval_runtime": 4.3115,
6
- "eval_samples_per_second": 3.247,
7
- "eval_steps_per_second": 0.232
8
  }
 
1
  {
2
+ "epoch": 5.1,
3
  "eval_accuracy": 0.42857142857142855,
4
+ "eval_loss": 2.0714259147644043,
5
+ "eval_runtime": 3.8707,
6
+ "eval_samples_per_second": 3.617,
7
+ "eval_steps_per_second": 0.517
8
  }
config.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "_name_or_path": "MCG-NJU/videomae-huge-finetuned-kinetics",
3
  "architectures": [
4
  "VideoMAEForVideoClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
- "decoder_hidden_size": 640,
8
- "decoder_intermediate_size": 2560,
9
- "decoder_num_attention_heads": 8,
10
- "decoder_num_hidden_layers": 12,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.0,
13
- "hidden_size": 1280,
14
  "id2label": {
15
  "0": "climb",
16
  "1": "crawl",
@@ -29,7 +29,7 @@
29
  },
30
  "image_size": 224,
31
  "initializer_range": 0.02,
32
- "intermediate_size": 5120,
33
  "label2id": {
34
  "climb": 0,
35
  "crawl": 1,
@@ -48,11 +48,11 @@
48
  },
49
  "layer_norm_eps": 1e-12,
50
  "model_type": "videomae",
51
- "norm_pix_loss": true,
52
- "num_attention_heads": 16,
53
  "num_channels": 3,
54
  "num_frames": 16,
55
- "num_hidden_layers": 32,
56
  "patch_size": 16,
57
  "problem_type": "single_label_classification",
58
  "qkv_bias": true,
 
1
  {
2
+ "_name_or_path": "MCG-NJU/videomae-base-finetuned-kinetics",
3
  "architectures": [
4
  "VideoMAEForVideoClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 384,
8
+ "decoder_intermediate_size": 1536,
9
+ "decoder_num_attention_heads": 6,
10
+ "decoder_num_hidden_layers": 4,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 768,
14
  "id2label": {
15
  "0": "climb",
16
  "1": "crawl",
 
29
  },
30
  "image_size": 224,
31
  "initializer_range": 0.02,
32
+ "intermediate_size": 3072,
33
  "label2id": {
34
  "climb": 0,
35
  "crawl": 1,
 
48
  },
49
  "layer_norm_eps": 1e-12,
50
  "model_type": "videomae",
51
+ "norm_pix_loss": false,
52
+ "num_attention_heads": 12,
53
  "num_channels": 3,
54
  "num_frames": 16,
55
+ "num_hidden_layers": 12,
56
  "patch_size": 16,
57
  "problem_type": "single_label_classification",
58
  "qkv_bias": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae3205b26f2006d84fdf5f7a0f4d4a1aa6d9b0de42bb0c4f46cb1f6e782af747
3
- size 2526560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2123bd697e01c062399aa24d6818dc3064782f219a69cd69864c69d5aa3df57f
3
+ size 344974248
runs/Mar25_09-23-36_plan.cs.vt.edu/events.out.tfevents.1711383837.plan.cs.vt.edu.376416.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef33a4dbd743b6b84fdcd8cdb9e0a35f7b911a4b769f96e2df816bbf6d90563
3
+ size 5074
runs/Mar25_09-32-34_plan.cs.vt.edu/events.out.tfevents.1711384364.plan.cs.vt.edu.376416.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48239d8c28d82c026743bf94e14c63195d7cb740c2ad82d331d6143c62a2da95
3
+ size 5074
runs/Mar25_09-32-34_plan.cs.vt.edu/events.out.tfevents.1711384513.plan.cs.vt.edu.376416.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df2ce2566dea9be0b2e6860e90bdc1a33374386cb436919b963fcbf95a87d43
3
+ size 5074
runs/Mar25_09-32-34_plan.cs.vt.edu/events.out.tfevents.1711384569.plan.cs.vt.edu.376416.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c885e9631091493a0606fff30004e065409c2e1353d9ed405f741617bb4f26
3
+ size 5123
runs/Mar25_09-32-34_plan.cs.vt.edu/events.out.tfevents.1711384600.plan.cs.vt.edu.376416.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e24efa2ca59f331be4c14c47350f77999cfbe4f00c608a823142bbdf3ce2f2
3
+ size 5123
runs/Mar25_09-32-34_plan.cs.vt.edu/events.out.tfevents.1711384646.plan.cs.vt.edu.376416.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e720408dfcd55af363cf818d1acb9244f9a0bdb152ec847b1d4add5b542f54
3
+ size 5123
runs/Mar25_09-40-56_plan.cs.vt.edu/events.out.tfevents.1711384864.plan.cs.vt.edu.394257.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:992c87eb2cb0ee976f99118e93fa327020001d7921fc68c3c8a0cb00b341f932
3
+ size 9634
runs/Mar25_09-40-56_plan.cs.vt.edu/events.out.tfevents.1711385275.plan.cs.vt.edu.394257.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26face0e8f8dc66b68723685ae10279d5420f09069f04872a3b8bd1973ce6687
3
+ size 722
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.01,
3
  "eval_accuracy": 0.42857142857142855,
4
- "eval_loss": 1.5347131490707397,
5
- "eval_runtime": 4.3115,
6
- "eval_samples_per_second": 3.247,
7
- "eval_steps_per_second": 0.232
8
  }
 
1
  {
2
+ "epoch": 5.1,
3
  "eval_accuracy": 0.42857142857142855,
4
+ "eval_loss": 2.0714259147644043,
5
+ "eval_runtime": 3.8707,
6
+ "eval_samples_per_second": 3.617,
7
+ "eval_steps_per_second": 0.517
8
  }
trainer_state.json CHANGED
@@ -1,738 +1,192 @@
1
  {
2
- "best_metric": 0.5,
3
- "best_model_checkpoint": "Tianjiao-Yu/videomae-huge/checkpoint-196",
4
- "epoch": 39.00727272727273,
5
  "eval_steps": 500,
6
- "global_step": 275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
13
- "learning_rate": 8.92857142857143e-06,
14
- "loss": 2.6465,
15
  "step": 5
16
  },
17
  {
18
- "epoch": 0.03,
19
- "eval_accuracy": 0.07142857142857142,
20
- "eval_loss": 2.647310972213745,
21
- "eval_runtime": 8.9709,
22
- "eval_samples_per_second": 3.121,
23
- "eval_steps_per_second": 0.223,
24
- "step": 7
25
- },
26
- {
27
- "epoch": 1.01,
28
- "learning_rate": 1.785714285714286e-05,
29
- "loss": 2.6262,
30
  "step": 10
31
  },
32
  {
33
- "epoch": 1.03,
34
- "eval_accuracy": 0.14285714285714285,
35
- "eval_loss": 2.624509811401367,
36
- "eval_runtime": 7.3449,
37
- "eval_samples_per_second": 3.812,
38
- "eval_steps_per_second": 0.272,
39
  "step": 14
40
  },
41
  {
42
- "epoch": 2.0,
43
- "learning_rate": 2.6785714285714288e-05,
44
- "loss": 2.5945,
45
  "step": 15
46
  },
47
  {
48
- "epoch": 2.02,
49
- "learning_rate": 3.571428571428572e-05,
50
- "loss": 2.5179,
51
  "step": 20
52
  },
53
  {
54
- "epoch": 2.03,
55
- "eval_accuracy": 0.17857142857142858,
56
- "eval_loss": 2.562652111053467,
57
- "eval_runtime": 8.1508,
58
- "eval_samples_per_second": 3.435,
59
- "eval_steps_per_second": 0.245,
60
- "step": 21
61
- },
62
- {
63
- "epoch": 3.01,
64
- "learning_rate": 4.464285714285715e-05,
65
- "loss": 2.4135,
66
  "step": 25
67
  },
68
  {
69
- "epoch": 3.03,
70
- "eval_accuracy": 0.14285714285714285,
71
- "eval_loss": 2.494605302810669,
72
- "eval_runtime": 7.2598,
73
- "eval_samples_per_second": 3.857,
74
- "eval_steps_per_second": 0.275,
75
  "step": 28
76
  },
77
  {
78
- "epoch": 4.01,
79
- "learning_rate": 4.9595141700404864e-05,
80
- "loss": 2.182,
81
  "step": 30
82
  },
83
  {
84
- "epoch": 4.03,
85
- "learning_rate": 4.8582995951417004e-05,
86
- "loss": 2.1202,
87
  "step": 35
88
  },
89
  {
90
- "epoch": 4.03,
91
- "eval_accuracy": 0.32142857142857145,
92
- "eval_loss": 2.4093017578125,
93
- "eval_runtime": 7.6554,
94
- "eval_samples_per_second": 3.658,
95
- "eval_steps_per_second": 0.261,
96
- "step": 35
97
- },
98
- {
99
- "epoch": 5.02,
100
- "learning_rate": 4.757085020242915e-05,
101
- "loss": 1.848,
102
  "step": 40
103
  },
104
  {
105
- "epoch": 5.03,
106
- "eval_accuracy": 0.2857142857142857,
107
- "eval_loss": 2.335253953933716,
108
- "eval_runtime": 7.517,
109
- "eval_samples_per_second": 3.725,
110
- "eval_steps_per_second": 0.266,
111
  "step": 42
112
  },
113
  {
114
- "epoch": 6.01,
115
- "learning_rate": 4.65587044534413e-05,
116
- "loss": 1.726,
117
  "step": 45
118
  },
119
  {
120
- "epoch": 6.03,
121
- "eval_accuracy": 0.32142857142857145,
122
- "eval_loss": 2.2536072731018066,
123
- "eval_runtime": 7.0278,
124
- "eval_samples_per_second": 3.984,
125
- "eval_steps_per_second": 0.285,
126
- "step": 49
127
- },
128
- {
129
- "epoch": 7.0,
130
- "learning_rate": 4.5546558704453443e-05,
131
- "loss": 1.5657,
132
  "step": 50
133
  },
134
  {
135
- "epoch": 7.02,
136
- "learning_rate": 4.453441295546559e-05,
137
- "loss": 1.3251,
138
  "step": 55
139
  },
140
  {
141
- "epoch": 7.03,
142
- "eval_accuracy": 0.35714285714285715,
143
- "eval_loss": 2.1673505306243896,
144
- "eval_runtime": 8.0208,
145
- "eval_samples_per_second": 3.491,
146
- "eval_steps_per_second": 0.249,
147
  "step": 56
148
  },
149
  {
150
- "epoch": 8.01,
151
- "learning_rate": 4.3522267206477737e-05,
152
- "loss": 1.174,
153
  "step": 60
154
  },
155
  {
156
- "epoch": 8.03,
157
- "eval_accuracy": 0.35714285714285715,
158
- "eval_loss": 2.1128125190734863,
159
- "eval_runtime": 8.0792,
160
- "eval_samples_per_second": 3.466,
161
- "eval_steps_per_second": 0.248,
162
- "step": 63
163
- },
164
- {
165
- "epoch": 9.01,
166
- "learning_rate": 4.251012145748988e-05,
167
- "loss": 0.9546,
168
  "step": 65
169
  },
170
  {
171
- "epoch": 9.03,
172
- "learning_rate": 4.149797570850202e-05,
173
- "loss": 0.9108,
174
  "step": 70
175
  },
176
  {
177
- "epoch": 9.03,
178
  "eval_accuracy": 0.42857142857142855,
179
- "eval_loss": 2.0245614051818848,
180
- "eval_runtime": 7.8233,
181
- "eval_samples_per_second": 3.579,
182
- "eval_steps_per_second": 0.256,
183
  "step": 70
184
  },
185
  {
186
- "epoch": 10.02,
187
- "learning_rate": 4.048582995951417e-05,
188
- "loss": 0.7467,
189
  "step": 75
190
  },
191
  {
192
- "epoch": 10.03,
193
- "eval_accuracy": 0.35714285714285715,
194
- "eval_loss": 1.9719778299331665,
195
- "eval_runtime": 7.6011,
196
- "eval_samples_per_second": 3.684,
197
- "eval_steps_per_second": 0.263,
198
- "step": 77
199
- },
200
- {
201
- "epoch": 11.01,
202
- "learning_rate": 3.9473684210526316e-05,
203
- "loss": 0.6224,
204
- "step": 80
205
- },
206
- {
207
- "epoch": 11.03,
208
- "eval_accuracy": 0.39285714285714285,
209
- "eval_loss": 1.9129974842071533,
210
- "eval_runtime": 7.7956,
211
- "eval_samples_per_second": 3.592,
212
- "eval_steps_per_second": 0.257,
213
- "step": 84
214
- },
215
- {
216
- "epoch": 12.0,
217
- "learning_rate": 3.846153846153846e-05,
218
- "loss": 0.5523,
219
- "step": 85
220
- },
221
- {
222
- "epoch": 12.02,
223
- "learning_rate": 3.744939271255061e-05,
224
- "loss": 0.4737,
225
- "step": 90
226
- },
227
- {
228
- "epoch": 12.03,
229
- "eval_accuracy": 0.39285714285714285,
230
- "eval_loss": 1.9135581254959106,
231
- "eval_runtime": 8.3212,
232
- "eval_samples_per_second": 3.365,
233
- "eval_steps_per_second": 0.24,
234
- "step": 91
235
- },
236
- {
237
- "epoch": 13.01,
238
- "learning_rate": 3.6437246963562756e-05,
239
- "loss": 0.3682,
240
- "step": 95
241
- },
242
- {
243
- "epoch": 13.03,
244
- "eval_accuracy": 0.4642857142857143,
245
- "eval_loss": 1.8183634281158447,
246
- "eval_runtime": 8.3757,
247
- "eval_samples_per_second": 3.343,
248
- "eval_steps_per_second": 0.239,
249
- "step": 98
250
- },
251
- {
252
- "epoch": 14.01,
253
- "learning_rate": 3.54251012145749e-05,
254
- "loss": 0.3151,
255
- "step": 100
256
- },
257
- {
258
- "epoch": 14.03,
259
- "learning_rate": 3.441295546558704e-05,
260
- "loss": 0.2657,
261
- "step": 105
262
- },
263
- {
264
- "epoch": 14.03,
265
- "eval_accuracy": 0.35714285714285715,
266
- "eval_loss": 1.8734323978424072,
267
- "eval_runtime": 8.0062,
268
- "eval_samples_per_second": 3.497,
269
- "eval_steps_per_second": 0.25,
270
- "step": 105
271
- },
272
- {
273
- "epoch": 15.02,
274
- "learning_rate": 3.340080971659919e-05,
275
- "loss": 0.2085,
276
- "step": 110
277
- },
278
- {
279
- "epoch": 15.03,
280
- "eval_accuracy": 0.42857142857142855,
281
- "eval_loss": 1.8544093370437622,
282
- "eval_runtime": 7.1501,
283
- "eval_samples_per_second": 3.916,
284
- "eval_steps_per_second": 0.28,
285
- "step": 112
286
- },
287
- {
288
- "epoch": 16.01,
289
- "learning_rate": 3.2388663967611336e-05,
290
- "loss": 0.175,
291
- "step": 115
292
- },
293
- {
294
- "epoch": 16.03,
295
- "eval_accuracy": 0.39285714285714285,
296
- "eval_loss": 1.8410834074020386,
297
- "eval_runtime": 7.4699,
298
- "eval_samples_per_second": 3.748,
299
- "eval_steps_per_second": 0.268,
300
- "step": 119
301
- },
302
- {
303
- "epoch": 17.0,
304
- "learning_rate": 3.137651821862348e-05,
305
- "loss": 0.1416,
306
- "step": 120
307
- },
308
- {
309
- "epoch": 17.02,
310
- "learning_rate": 3.0364372469635626e-05,
311
- "loss": 0.1327,
312
- "step": 125
313
- },
314
- {
315
- "epoch": 17.03,
316
- "eval_accuracy": 0.39285714285714285,
317
- "eval_loss": 1.7585405111312866,
318
- "eval_runtime": 7.2596,
319
- "eval_samples_per_second": 3.857,
320
- "eval_steps_per_second": 0.275,
321
- "step": 126
322
- },
323
- {
324
- "epoch": 18.01,
325
- "learning_rate": 2.9352226720647776e-05,
326
- "loss": 0.1001,
327
- "step": 130
328
- },
329
- {
330
- "epoch": 18.03,
331
- "eval_accuracy": 0.39285714285714285,
332
- "eval_loss": 1.8193204402923584,
333
- "eval_runtime": 7.6478,
334
- "eval_samples_per_second": 3.661,
335
- "eval_steps_per_second": 0.262,
336
- "step": 133
337
- },
338
- {
339
- "epoch": 19.01,
340
- "learning_rate": 2.8340080971659922e-05,
341
- "loss": 0.094,
342
- "step": 135
343
- },
344
- {
345
- "epoch": 19.03,
346
- "learning_rate": 2.732793522267207e-05,
347
- "loss": 0.0832,
348
- "step": 140
349
- },
350
- {
351
- "epoch": 19.03,
352
- "eval_accuracy": 0.39285714285714285,
353
- "eval_loss": 1.759466528892517,
354
- "eval_runtime": 7.1794,
355
- "eval_samples_per_second": 3.9,
356
- "eval_steps_per_second": 0.279,
357
- "step": 140
358
- },
359
- {
360
- "epoch": 20.02,
361
- "learning_rate": 2.6315789473684212e-05,
362
- "loss": 0.085,
363
- "step": 145
364
- },
365
- {
366
- "epoch": 20.03,
367
- "eval_accuracy": 0.42857142857142855,
368
- "eval_loss": 1.7836247682571411,
369
- "eval_runtime": 7.7103,
370
- "eval_samples_per_second": 3.631,
371
- "eval_steps_per_second": 0.259,
372
- "step": 147
373
- },
374
- {
375
- "epoch": 21.01,
376
- "learning_rate": 2.530364372469636e-05,
377
- "loss": 0.0614,
378
- "step": 150
379
- },
380
- {
381
- "epoch": 21.03,
382
- "eval_accuracy": 0.42857142857142855,
383
- "eval_loss": 1.7181528806686401,
384
- "eval_runtime": 7.4571,
385
- "eval_samples_per_second": 3.755,
386
- "eval_steps_per_second": 0.268,
387
- "step": 154
388
- },
389
- {
390
- "epoch": 22.0,
391
- "learning_rate": 2.4291497975708502e-05,
392
- "loss": 0.0604,
393
- "step": 155
394
- },
395
- {
396
- "epoch": 22.02,
397
- "learning_rate": 2.327935222672065e-05,
398
- "loss": 0.0496,
399
- "step": 160
400
- },
401
- {
402
- "epoch": 22.03,
403
- "eval_accuracy": 0.42857142857142855,
404
- "eval_loss": 1.7844511270523071,
405
- "eval_runtime": 7.281,
406
- "eval_samples_per_second": 3.846,
407
- "eval_steps_per_second": 0.275,
408
- "step": 161
409
- },
410
- {
411
- "epoch": 23.01,
412
- "learning_rate": 2.2267206477732795e-05,
413
- "loss": 0.0474,
414
- "step": 165
415
- },
416
- {
417
- "epoch": 23.03,
418
- "eval_accuracy": 0.4642857142857143,
419
- "eval_loss": 1.771189570426941,
420
- "eval_runtime": 7.0825,
421
- "eval_samples_per_second": 3.953,
422
- "eval_steps_per_second": 0.282,
423
- "step": 168
424
- },
425
- {
426
- "epoch": 24.01,
427
- "learning_rate": 2.125506072874494e-05,
428
- "loss": 0.0423,
429
- "step": 170
430
- },
431
- {
432
- "epoch": 24.03,
433
- "learning_rate": 2.0242914979757085e-05,
434
- "loss": 0.0403,
435
- "step": 175
436
- },
437
- {
438
- "epoch": 24.03,
439
- "eval_accuracy": 0.4642857142857143,
440
- "eval_loss": 1.740900993347168,
441
- "eval_runtime": 6.8813,
442
- "eval_samples_per_second": 4.069,
443
- "eval_steps_per_second": 0.291,
444
- "step": 175
445
- },
446
- {
447
- "epoch": 25.02,
448
- "learning_rate": 1.923076923076923e-05,
449
- "loss": 0.0395,
450
- "step": 180
451
- },
452
- {
453
- "epoch": 25.03,
454
- "eval_accuracy": 0.4642857142857143,
455
- "eval_loss": 1.7425283193588257,
456
- "eval_runtime": 7.2597,
457
- "eval_samples_per_second": 3.857,
458
- "eval_steps_per_second": 0.275,
459
- "step": 182
460
- },
461
- {
462
- "epoch": 26.01,
463
- "learning_rate": 1.8218623481781378e-05,
464
- "loss": 0.0369,
465
- "step": 185
466
- },
467
- {
468
- "epoch": 26.03,
469
- "eval_accuracy": 0.4642857142857143,
470
- "eval_loss": 1.7531570196151733,
471
- "eval_runtime": 6.8458,
472
- "eval_samples_per_second": 4.09,
473
- "eval_steps_per_second": 0.292,
474
- "step": 189
475
- },
476
- {
477
- "epoch": 27.0,
478
- "learning_rate": 1.720647773279352e-05,
479
- "loss": 0.0358,
480
- "step": 190
481
- },
482
- {
483
- "epoch": 27.02,
484
- "learning_rate": 1.6194331983805668e-05,
485
- "loss": 0.0346,
486
- "step": 195
487
- },
488
- {
489
- "epoch": 27.03,
490
- "eval_accuracy": 0.5,
491
- "eval_loss": 1.7388352155685425,
492
- "eval_runtime": 7.2714,
493
- "eval_samples_per_second": 3.851,
494
- "eval_steps_per_second": 0.275,
495
- "step": 196
496
- },
497
- {
498
- "epoch": 28.01,
499
- "learning_rate": 1.5182186234817813e-05,
500
- "loss": 0.0321,
501
- "step": 200
502
- },
503
- {
504
- "epoch": 28.03,
505
- "eval_accuracy": 0.5,
506
- "eval_loss": 1.7390631437301636,
507
- "eval_runtime": 7.0464,
508
- "eval_samples_per_second": 3.974,
509
- "eval_steps_per_second": 0.284,
510
- "step": 203
511
- },
512
- {
513
- "epoch": 29.01,
514
- "learning_rate": 1.4170040485829961e-05,
515
- "loss": 0.0351,
516
- "step": 205
517
- },
518
- {
519
- "epoch": 29.03,
520
- "learning_rate": 1.3157894736842106e-05,
521
- "loss": 0.0314,
522
- "step": 210
523
- },
524
- {
525
- "epoch": 29.03,
526
- "eval_accuracy": 0.5,
527
- "eval_loss": 1.7469881772994995,
528
- "eval_runtime": 6.6833,
529
- "eval_samples_per_second": 4.19,
530
- "eval_steps_per_second": 0.299,
531
- "step": 210
532
- },
533
- {
534
- "epoch": 30.02,
535
- "learning_rate": 1.2145748987854251e-05,
536
- "loss": 0.0313,
537
- "step": 215
538
- },
539
- {
540
- "epoch": 30.03,
541
- "eval_accuracy": 0.4642857142857143,
542
- "eval_loss": 1.7349082231521606,
543
- "eval_runtime": 6.7759,
544
- "eval_samples_per_second": 4.132,
545
- "eval_steps_per_second": 0.295,
546
- "step": 217
547
- },
548
- {
549
- "epoch": 31.01,
550
- "learning_rate": 1.1133603238866398e-05,
551
- "loss": 0.0307,
552
- "step": 220
553
- },
554
- {
555
- "epoch": 31.03,
556
- "eval_accuracy": 0.4642857142857143,
557
- "eval_loss": 1.7574304342269897,
558
- "eval_runtime": 6.501,
559
- "eval_samples_per_second": 4.307,
560
- "eval_steps_per_second": 0.308,
561
- "step": 224
562
- },
563
- {
564
- "epoch": 32.0,
565
- "learning_rate": 1.0121457489878542e-05,
566
- "loss": 0.0296,
567
- "step": 225
568
- },
569
- {
570
- "epoch": 32.02,
571
- "learning_rate": 9.109311740890689e-06,
572
- "loss": 0.0283,
573
- "step": 230
574
- },
575
- {
576
- "epoch": 32.03,
577
  "eval_accuracy": 0.42857142857142855,
578
- "eval_loss": 1.7856690883636475,
579
- "eval_runtime": 6.5016,
580
- "eval_samples_per_second": 4.307,
581
- "eval_steps_per_second": 0.308,
582
- "step": 231
583
- },
584
- {
585
- "epoch": 33.01,
586
- "learning_rate": 8.097165991902834e-06,
587
- "loss": 0.0276,
588
- "step": 235
589
- },
590
- {
591
- "epoch": 33.03,
592
- "eval_accuracy": 0.4642857142857143,
593
- "eval_loss": 1.786450982093811,
594
- "eval_runtime": 7.4349,
595
- "eval_samples_per_second": 3.766,
596
- "eval_steps_per_second": 0.269,
597
- "step": 238
598
- },
599
- {
600
- "epoch": 34.01,
601
- "learning_rate": 7.0850202429149805e-06,
602
- "loss": 0.0282,
603
- "step": 240
604
- },
605
- {
606
- "epoch": 34.03,
607
- "learning_rate": 6.0728744939271254e-06,
608
- "loss": 0.0257,
609
- "step": 245
610
- },
611
- {
612
- "epoch": 34.03,
613
- "eval_accuracy": 0.4642857142857143,
614
- "eval_loss": 1.7707021236419678,
615
- "eval_runtime": 7.9939,
616
- "eval_samples_per_second": 3.503,
617
- "eval_steps_per_second": 0.25,
618
- "step": 245
619
- },
620
- {
621
- "epoch": 35.02,
622
- "learning_rate": 5.060728744939271e-06,
623
- "loss": 0.0264,
624
- "step": 250
625
  },
626
  {
627
- "epoch": 35.03,
628
- "eval_accuracy": 0.4642857142857143,
629
- "eval_loss": 1.7682857513427734,
630
- "eval_runtime": 7.2901,
631
- "eval_samples_per_second": 3.841,
632
- "eval_steps_per_second": 0.274,
633
- "step": 252
634
  },
635
  {
636
- "epoch": 36.01,
637
- "learning_rate": 4.048582995951417e-06,
638
- "loss": 0.0254,
639
- "step": 255
640
- },
641
- {
642
- "epoch": 36.03,
643
- "eval_accuracy": 0.4642857142857143,
644
- "eval_loss": 1.7711747884750366,
645
- "eval_runtime": 7.529,
646
- "eval_samples_per_second": 3.719,
647
- "eval_steps_per_second": 0.266,
648
- "step": 259
649
- },
650
- {
651
- "epoch": 37.0,
652
- "learning_rate": 3.0364372469635627e-06,
653
- "loss": 0.0247,
654
- "step": 260
655
- },
656
- {
657
- "epoch": 37.02,
658
- "learning_rate": 2.0242914979757085e-06,
659
- "loss": 0.0257,
660
- "step": 265
661
- },
662
- {
663
- "epoch": 37.03,
664
- "eval_accuracy": 0.4642857142857143,
665
- "eval_loss": 1.7735536098480225,
666
- "eval_runtime": 7.5429,
667
- "eval_samples_per_second": 3.712,
668
- "eval_steps_per_second": 0.265,
669
- "step": 266
670
- },
671
- {
672
- "epoch": 38.01,
673
- "learning_rate": 1.0121457489878542e-06,
674
- "loss": 0.0246,
675
- "step": 270
676
- },
677
- {
678
- "epoch": 38.03,
679
- "eval_accuracy": 0.4642857142857143,
680
- "eval_loss": 1.7748419046401978,
681
- "eval_runtime": 7.4873,
682
- "eval_samples_per_second": 3.74,
683
- "eval_steps_per_second": 0.267,
684
- "step": 273
685
- },
686
- {
687
- "epoch": 39.01,
688
- "learning_rate": 0.0,
689
- "loss": 0.0242,
690
- "step": 275
691
- },
692
- {
693
- "epoch": 39.01,
694
- "eval_accuracy": 0.4642857142857143,
695
- "eval_loss": 1.774975061416626,
696
- "eval_runtime": 7.7693,
697
- "eval_samples_per_second": 3.604,
698
- "eval_steps_per_second": 0.257,
699
- "step": 275
700
- },
701
- {
702
- "epoch": 39.01,
703
- "step": 275,
704
- "total_flos": 3.980465870018229e+19,
705
- "train_loss": 0.5794233712283048,
706
- "train_runtime": 2037.2017,
707
- "train_samples_per_second": 2.16,
708
- "train_steps_per_second": 0.135
709
- },
710
- {
711
- "epoch": 39.01,
712
  "eval_accuracy": 0.42857142857142855,
713
- "eval_loss": 1.5347131490707397,
714
- "eval_runtime": 5.4942,
715
- "eval_samples_per_second": 2.548,
716
- "eval_steps_per_second": 0.182,
717
- "step": 275
718
  },
719
  {
720
- "epoch": 39.01,
721
  "eval_accuracy": 0.42857142857142855,
722
- "eval_loss": 1.5347131490707397,
723
- "eval_runtime": 4.3115,
724
- "eval_samples_per_second": 3.247,
725
- "eval_steps_per_second": 0.232,
726
- "step": 275
727
  }
728
  ],
729
  "logging_steps": 5,
730
- "max_steps": 275,
731
  "num_input_tokens_seen": 0,
732
  "num_train_epochs": 9223372036854775807,
733
  "save_steps": 500,
734
- "total_flos": 3.980465870018229e+19,
735
- "train_batch_size": 16,
736
  "trial_name": null,
737
  "trial_params": null
738
  }
 
1
  {
2
+ "best_metric": 0.42857142857142855,
3
+ "best_model_checkpoint": "Tianjiao-Yu/videomae-huge/checkpoint-70",
4
+ "epoch": 5.102564102564102,
5
  "eval_steps": 500,
6
+ "global_step": 78,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06,
13
+ "learning_rate": 3.125e-05,
14
+ "loss": 2.66,
15
  "step": 5
16
  },
17
  {
18
+ "epoch": 0.13,
19
+ "learning_rate": 4.8571428571428576e-05,
20
+ "loss": 2.6392,
 
 
 
 
 
 
 
 
 
21
  "step": 10
22
  },
23
  {
24
+ "epoch": 0.18,
25
+ "eval_accuracy": 0.25,
26
+ "eval_loss": 2.5779731273651123,
27
+ "eval_runtime": 7.9027,
28
+ "eval_samples_per_second": 3.543,
29
+ "eval_steps_per_second": 0.506,
30
  "step": 14
31
  },
32
  {
33
+ "epoch": 1.01,
34
+ "learning_rate": 4.5e-05,
35
+ "loss": 2.5921,
36
  "step": 15
37
  },
38
  {
39
+ "epoch": 1.08,
40
+ "learning_rate": 4.1428571428571437e-05,
41
+ "loss": 2.2172,
42
  "step": 20
43
  },
44
  {
45
+ "epoch": 1.14,
46
+ "learning_rate": 3.785714285714286e-05,
47
+ "loss": 1.994,
 
 
 
 
 
 
 
 
 
48
  "step": 25
49
  },
50
  {
51
+ "epoch": 1.18,
52
+ "eval_accuracy": 0.39285714285714285,
53
+ "eval_loss": 2.399493932723999,
54
+ "eval_runtime": 7.938,
55
+ "eval_samples_per_second": 3.527,
56
+ "eval_steps_per_second": 0.504,
57
  "step": 28
58
  },
59
  {
60
+ "epoch": 2.03,
61
+ "learning_rate": 3.428571428571429e-05,
62
+ "loss": 1.947,
63
  "step": 30
64
  },
65
  {
66
+ "epoch": 2.09,
67
+ "learning_rate": 3.071428571428572e-05,
68
+ "loss": 1.4849,
69
  "step": 35
70
  },
71
  {
72
+ "epoch": 2.15,
73
+ "learning_rate": 2.714285714285714e-05,
74
+ "loss": 1.6374,
 
 
 
 
 
 
 
 
 
75
  "step": 40
76
  },
77
  {
78
+ "epoch": 2.18,
79
+ "eval_accuracy": 0.39285714285714285,
80
+ "eval_loss": 2.3010246753692627,
81
+ "eval_runtime": 7.9782,
82
+ "eval_samples_per_second": 3.51,
83
+ "eval_steps_per_second": 0.501,
84
  "step": 42
85
  },
86
  {
87
+ "epoch": 3.04,
88
+ "learning_rate": 2.357142857142857e-05,
89
+ "loss": 1.3647,
90
  "step": 45
91
  },
92
  {
93
+ "epoch": 3.1,
94
+ "learning_rate": 2e-05,
95
+ "loss": 1.216,
 
 
 
 
 
 
 
 
 
96
  "step": 50
97
  },
98
  {
99
+ "epoch": 3.17,
100
+ "learning_rate": 1.642857142857143e-05,
101
+ "loss": 1.124,
102
  "step": 55
103
  },
104
  {
105
+ "epoch": 3.18,
106
+ "eval_accuracy": 0.39285714285714285,
107
+ "eval_loss": 2.2242484092712402,
108
+ "eval_runtime": 7.7889,
109
+ "eval_samples_per_second": 3.595,
110
+ "eval_steps_per_second": 0.514,
111
  "step": 56
112
  },
113
  {
114
+ "epoch": 4.05,
115
+ "learning_rate": 1.2857142857142857e-05,
116
+ "loss": 1.0848,
117
  "step": 60
118
  },
119
  {
120
+ "epoch": 4.12,
121
+ "learning_rate": 9.285714285714286e-06,
122
+ "loss": 0.9794,
 
 
 
 
 
 
 
 
 
123
  "step": 65
124
  },
125
  {
126
+ "epoch": 4.18,
127
+ "learning_rate": 5.7142857142857145e-06,
128
+ "loss": 0.9569,
129
  "step": 70
130
  },
131
  {
132
+ "epoch": 4.18,
133
  "eval_accuracy": 0.42857142857142855,
134
+ "eval_loss": 2.182518482208252,
135
+ "eval_runtime": 7.9247,
136
+ "eval_samples_per_second": 3.533,
137
+ "eval_steps_per_second": 0.505,
138
  "step": 70
139
  },
140
  {
141
+ "epoch": 5.06,
142
+ "learning_rate": 2.142857142857143e-06,
143
+ "loss": 0.8862,
144
  "step": 75
145
  },
146
  {
147
+ "epoch": 5.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  "eval_accuracy": 0.42857142857142855,
149
+ "eval_loss": 2.1734211444854736,
150
+ "eval_runtime": 7.8943,
151
+ "eval_samples_per_second": 3.547,
152
+ "eval_steps_per_second": 0.507,
153
+ "step": 78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  },
155
  {
156
+ "epoch": 5.1,
157
+ "step": 78,
158
+ "total_flos": 7.713965251203564e+17,
159
+ "train_loss": 1.6192821661631267,
160
+ "train_runtime": 267.6762,
161
+ "train_samples_per_second": 2.331,
162
+ "train_steps_per_second": 0.291
163
  },
164
  {
165
+ "epoch": 5.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  "eval_accuracy": 0.42857142857142855,
167
+ "eval_loss": 2.0714259147644043,
168
+ "eval_runtime": 4.1878,
169
+ "eval_samples_per_second": 3.343,
170
+ "eval_steps_per_second": 0.478,
171
+ "step": 78
172
  },
173
  {
174
+ "epoch": 5.1,
175
  "eval_accuracy": 0.42857142857142855,
176
+ "eval_loss": 2.0714259147644043,
177
+ "eval_runtime": 3.8707,
178
+ "eval_samples_per_second": 3.617,
179
+ "eval_steps_per_second": 0.517,
180
+ "step": 78
181
  }
182
  ],
183
  "logging_steps": 5,
184
+ "max_steps": 78,
185
  "num_input_tokens_seen": 0,
186
  "num_train_epochs": 9223372036854775807,
187
  "save_steps": 500,
188
+ "total_flos": 7.713965251203564e+17,
189
+ "train_batch_size": 8,
190
  "trial_name": null,
191
  "trial_params": null
192
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5da55cdf3ae599f60b6c1ca815936709b3bb7791b63b743fed24cd1f23ce505
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9df7204865a72646bb05717e94fcd96b091033787df0920383d9f482c0253e
3
  size 4728