codebyzeb commited on
Commit
d842af3
·
verified ·
1 Parent(s): b048d25

Final model for experiment Dutch

Browse files
Files changed (6) hide show
  1. README.md +47 -47
  2. config.json +1 -1
  3. model.safetensors +2 -2
  4. tokenizer.json +47 -112
  5. training_args.bin +1 -1
  6. vocab.json +1 -1
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  tags:
4
- - Mandarin
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
@@ -16,53 +16,53 @@ should probably proofread and complete it, then remove this comment. -->
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
- - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.4895
20
- - eval_absolute_seg_boundary_fscore_Entropy: 0.5708
21
- - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.4915
22
- - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.6177
23
- - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.6662
24
- - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.7024
25
- - eval_absolute_seg_boundary_fscore_Loss: 0.5977
26
- - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.7211
27
- - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.7641
28
- - eval_absolute_seg_boundary_fscore_Rank: 0.6237
29
- - eval_absolute_seg_type_fscore_Boundary Prediction: 0.4682
30
- - eval_absolute_seg_type_fscore_Entropy: 0.3143
31
- - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.5365
32
- - eval_absolute_seg_type_fscore_Increase in Entropy: 0.3262
33
- - eval_absolute_seg_type_fscore_Increase in Loss: 0.3540
34
- - eval_absolute_seg_type_fscore_Increase in Rank: 0.4184
35
- - eval_absolute_seg_type_fscore_Loss: 0.2843
36
- - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4790
37
- - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4732
38
- - eval_absolute_seg_type_fscore_Rank: 0.3459
39
- - eval_bpc: 4.5280
40
- - eval_loss: 3.1386
41
  - eval_model_preparation_time: 0.0008
42
- - eval_perplexity: 23.0705
43
- - eval_runtime: 45.72
44
- - eval_samples_per_second: 3.084
45
- - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.8113
46
- - eval_spike_seg_boundary_fscore_Entropy: 0.5823
47
- - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.8144
48
- - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.6192
49
- - eval_spike_seg_boundary_fscore_Increase in Loss: 0.6641
50
- - eval_spike_seg_boundary_fscore_Increase in Rank: 0.7155
51
- - eval_spike_seg_boundary_fscore_Loss: 0.6074
52
- - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.7518
53
- - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.7574
54
- - eval_spike_seg_boundary_fscore_Rank: 0.6497
55
- - eval_spike_seg_type_fscore_Boundary Prediction: 0.5335
56
- - eval_spike_seg_type_fscore_Entropy: 0.3071
57
- - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.5314
58
- - eval_spike_seg_type_fscore_Increase in Entropy: 0.3199
59
- - eval_spike_seg_type_fscore_Increase in Loss: 0.3475
60
- - eval_spike_seg_type_fscore_Increase in Rank: 0.4171
61
- - eval_spike_seg_type_fscore_Loss: 0.2965
62
- - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4907
63
- - eval_spike_seg_type_fscore_Majority Vote Spike: 0.4385
64
- - eval_spike_seg_type_fscore_Rank: 0.3780
65
- - eval_steps_per_second: 0.109
66
  - step: 100000
67
 
68
  ## Model description
 
1
  ---
2
  library_name: transformers
3
  tags:
4
+ - Dutch
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
 
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
+ - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.6381
20
+ - eval_absolute_seg_boundary_fscore_Entropy: 0.4936
21
+ - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.6397
22
+ - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.6171
23
+ - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.6068
24
+ - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.6806
25
+ - eval_absolute_seg_boundary_fscore_Loss: 0.5355
26
+ - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.7011
27
+ - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.7273
28
+ - eval_absolute_seg_boundary_fscore_Rank: 0.5571
29
+ - eval_absolute_seg_type_fscore_Boundary Prediction: 0.1447
30
+ - eval_absolute_seg_type_fscore_Entropy: 0.2626
31
+ - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3233
32
+ - eval_absolute_seg_type_fscore_Increase in Entropy: 0.3100
33
+ - eval_absolute_seg_type_fscore_Increase in Loss: 0.2509
34
+ - eval_absolute_seg_type_fscore_Increase in Rank: 0.4174
35
+ - eval_absolute_seg_type_fscore_Loss: 0.2412
36
+ - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4319
37
+ - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4164
38
+ - eval_absolute_seg_type_fscore_Rank: 0.2972
39
+ - eval_bpc: 4.4805
40
+ - eval_loss: 3.1056
41
  - eval_model_preparation_time: 0.0008
42
+ - eval_perplexity: 22.3231
43
+ - eval_runtime: 47.9148
44
+ - eval_samples_per_second: 2.964
45
+ - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.7308
46
+ - eval_spike_seg_boundary_fscore_Entropy: 0.5944
47
+ - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.7195
48
+ - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.6282
49
+ - eval_spike_seg_boundary_fscore_Increase in Loss: 0.6258
50
+ - eval_spike_seg_boundary_fscore_Increase in Rank: 0.6735
51
+ - eval_spike_seg_boundary_fscore_Loss: 0.5578
52
+ - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.7329
53
+ - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.7085
54
+ - eval_spike_seg_boundary_fscore_Rank: 0.6106
55
+ - eval_spike_seg_type_fscore_Boundary Prediction: 0.3885
56
+ - eval_spike_seg_type_fscore_Entropy: 0.2817
57
+ - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.3610
58
+ - eval_spike_seg_type_fscore_Increase in Entropy: 0.2866
59
+ - eval_spike_seg_type_fscore_Increase in Loss: 0.3167
60
+ - eval_spike_seg_type_fscore_Increase in Rank: 0.3724
61
+ - eval_spike_seg_type_fscore_Loss: 0.2626
62
+ - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4120
63
+ - eval_spike_seg_type_fscore_Majority Vote Spike: 0.3425
64
+ - eval_spike_seg_type_fscore_Rank: 0.3228
65
+ - eval_steps_per_second: 0.104
66
  - step: 100000
67
 
68
  ## Model description
config.json CHANGED
@@ -27,5 +27,5 @@
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
- "vocab_size": 115
31
  }
 
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
+ "vocab_size": 50
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7efa21a386251f84b729d3205aa549d7c8926a03a23ff83f171997f706fda2
3
- size 2574104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2419c1d9f086c904f711146ca854e725f4afa8b229953ec297133263c26d1b8d
3
+ size 2540824
tokenizer.json CHANGED
@@ -51,7 +51,7 @@
51
  ]
52
  },
53
  "pre_tokenizer": {
54
- "type": "WhitespaceSplit"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
@@ -115,117 +115,52 @@
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
- "a˧˥": 4,
119
- "u˧˥": 5,
120
- "": 6,
121
- "au": 7,
122
- "n": 8,
123
- "a˥˩": 9,
124
- "ʃ̺": 10,
125
- "ɻ̩˥˩": 11,
126
- "ə˧˥": 12,
127
- "m": 13,
128
- "ɤ": 14,
129
- "p": 15,
130
- "j": 16,
131
- "e˧˥": 17,
132
- "": 18,
133
- "k": 19,
134
- "ɤ˥˩": 20,
135
- "w": 21,
136
- "": 22,
137
- "t̠ʃ̺ʰ": 23,
138
- "ə˥": 24,
139
- "ŋ": 25,
140
- "t": 26,
141
- "ʊ˥": 27,
142
- "ɕ": 28,
143
- "i": 29,
144
- "a": 30,
145
- "l": 31,
146
- "au˧˩˧": 32,
147
- "x": 33,
148
- "u˧˩˧": 34,
149
- "": 35,
150
- "ei˧˩˧": 36,
151
- "": 37,
152
- "i˧˥": 38,
153
- "ai˧˥": 39,
154
- "ou˧˩˧": 40,
155
- "ɤ˧˥": 41,
156
- "o˧˩˧": 42,
157
- "": 43,
158
- "au˥˩": 44,
159
- "ts": 45,
160
- "ə˧˩˧": 46,
161
- "ɤ˥": 47,
162
- "ei˧˥": 48,
163
- "ʊ˧˥": 49,
164
- "i˧˩˧": 50,
165
- "t̠ʃ̺": 51,
166
- "ɻ̩˧˩˧": 52,
167
- "ei˥˩": 53,
168
- "s": 54,
169
- "u˥˩": 55,
170
- "ɹ̪̩": 56,
171
- "ai˥": 57,
172
- "u˥": 58,
173
- "tɕʰ": 59,
174
- "a˧˩˧": 60,
175
- "ai˥˩": 61,
176
- "ɛ˥˩": 62,
177
- "f": 63,
178
- "i˥˩": 64,
179
- "y˥˩": 65,
180
- "au˧˥": 66,
181
- "ɻ": 67,
182
- "ou˥˩": 68,
183
- "e˥": 69,
184
- "tʰ": 70,
185
- "ɹ̪̩˥˩": 71,
186
- "ɛ˧˥": 72,
187
- "au˥": 73,
188
- "ou˧˥": 74,
189
- "e˧˩˧": 75,
190
- "ɛ˥": 76,
191
- "ɻ̩˥": 77,
192
- "ɥ": 78,
193
- "ɹ̪̩˧˩˧": 79,
194
- "ai˧˩˧": 80,
195
- "ou˥": 81,
196
- "o˥˩": 82,
197
- "ɛ˧˩˧": 83,
198
- "ʊ˧˩˧": 84,
199
- "ɔ˥": 85,
200
- "tsʰ": 86,
201
- "ei": 87,
202
- "ə˥˩": 88,
203
- "o": 89,
204
- "ʊ˥˩": 90,
205
- "ou": 91,
206
- "ɤ˧˩˧": 92,
207
- "o˧˥": 93,
208
- "ei˥": 94,
209
- "e˥˩": 95,
210
- "ɚ˧˩˧": 96,
211
- "y˥": 97,
212
- "ɚ˥˩": 98,
213
- "y˧˥": 99,
214
- "ɻ̩": 100,
215
- "y˧˩˧": 101,
216
- "ɹ̪̩˥": 102,
217
- "ɻ̩˧˥": 103,
218
- "u": 104,
219
- "ə": 105,
220
- "ai": 106,
221
- "ʊ": 107,
222
- "e": 108,
223
- "ɚ˧˥": 109,
224
- "ɔ˥˩": 110,
225
- "ɹ̪̩˧˥": 111,
226
- "ɛ": 112,
227
- "y": 113,
228
- "m˧˥": 114
229
  },
230
  "unk_token": "UNK"
231
  }
 
51
  ]
52
  },
53
  "pre_tokenizer": {
54
+ "type": "Whitespace"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
 
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
+ "z": 4,
119
+ "": 5,
120
+ "j": 6,
121
+ "ãː": 7,
122
+ "ɦ": 8,
123
+ "ɾ": 9,
124
+ "d": 10,
125
+ "i": 11,
126
+ "ɛ": 12,
127
+ "p": 13,
128
+ "ɪ": 14,
129
+ "k": 15,
130
+ "ɑ": 16,
131
+ "l": 17,
132
+ "ɛː": 18,
133
+ "n": 19,
134
+ "s": 20,
135
+ "v": 21,
136
+ "ə": 22,
137
+ "ɛi": 23,
138
+ "ʋ": 24,
139
+ "t": 25,
140
+ "m": 26,
141
+ "ɣ": 27,
142
+ "ʏ": 28,
143
+ "ɔ": 29,
144
+ "x": 30,
145
+ "u": 31,
146
+ "f": 32,
147
+ "ŋ": 33,
148
+ "øː": 34,
149
+ "b": 35,
150
+ "ɔː": 36,
151
+ "ʌu": 37,
152
+ "y": 38,
153
+ "œy": 39,
154
+ "": 40,
155
+ "w": 41,
156
+ "ʃ": 42,
157
+ "t̠ʃ": 43,
158
+ "ɲ": 44,
159
+ "ʒ": 45,
160
+ "": 46,
161
+ "ɡ": 47,
162
+ "d̠ʒ": 48,
163
+ "": 49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  },
165
  "unk_token": "UNK"
166
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5aeece3087b363ef297505f6884ea73978b1fc89a8380b1aea665daf958b97
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051d8089b9b2a56e1beb0ffb7cb9d9effdd39e932954aebd1b6feb1c099a56c7
3
  size 5368
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"a˧˥":4,"u˧˥":5,"":6,"au":7,"n":8,"a˥˩":9,"ʃ̺":10,"ɻ̩˥˩":11,"ə˧˥":12,"m":13,"ɤ":14,"p":15,"j":16,"e˧˥":17,"":18,"k":19,"ɤ˥˩":20,"w":21,"":22,"t̠ʃ̺ʰ":23,"ə˥":24,"ŋ":25,"t":26,"ʊ˥":27,"ɕ":28,"i":29,"a":30,"l":31,"au˧˩˧":32,"x":33,"u˧˩˧":34,"":35,"ei˧˩˧":36,"":37,"i˧˥":38,"ai˧˥":39,"ou˧˩˧":40,"ɤ˧˥":41,"o˧˩˧":42,"":43,"au˥˩":44,"ts":45,"ə˧˩˧":46,"ɤ˥":47,"ei˧˥":48,"ʊ˧˥":49,"i˧˩˧":50,"t̠ʃ̺":51,"ɻ̩˧˩˧":52,"ei˥˩":53,"s":54,"u˥˩":55,"ɹ̪̩":56,"ai˥":57,"u˥":58,"tɕʰ":59,"a˧˩˧":60,"ai˥˩":61,"ɛ˥˩":62,"f":63,"i˥˩":64,"y˥˩":65,"au˧˥":66,"ɻ":67,"ou˥˩":68,"e˥":69,"tʰ":70,"ɹ̪̩˥˩":71,"ɛ˧˥":72,"au˥":73,"ou˧˥":74,"e˧˩˧":75,"ɛ˥":76,"ɻ̩˥":77,"ɥ":78,"ɹ̪̩˧˩˧":79,"ai˧˩˧":80,"ou˥":81,"o˥˩":82,"ɛ˧˩˧":83,"ʊ˧˩˧":84,"ɔ˥":85,"tsʰ":86,"ei":87,"ə˥˩":88,"o":89,"ʊ˥˩":90,"ou":91,"ɤ˧˩˧":92,"o˧˥":93,"ei˥":94,"e˥˩":95,"ɚ˧˩˧":96,"y˥":97,"ɚ˥˩":98,"y˧˥":99,"ɻ̩":100,"y˧˩˧":101,"ɹ̪̩˥":102,"ɻ̩˧˥":103,"u":104,"ə":105,"ai":106,"ʊ":107,"e":108,"ɚ˧˥":109,"ɔ˥˩":110,"ɹ̪̩˧˥":111,"ɛ":112,"y":113,"m˧˥":114}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"z":4,"":5,"j":6,"ãː":7,"ɦ":8,"ɾ":9,"d":10,"i":11,"ɛ":12,"p":13,"ɪ":14,"k":15,"ɑ":16,"l":17,"ɛː":18,"n":19,"s":20,"v":21,"ə":22,"ɛi":23,"ʋ":24,"t":25,"m":26,"ɣ":27,"ʏ":28,"ɔ":29,"x":30,"u":31,"f":32,"ŋ":33,"øː":34,"b":35,"ɔː":36,"ʌu":37,"y":38,"œy":39,"":40,"w":41,"ʃ":42,"t̠ʃ":43,"ɲ":44,"ʒ":45,"":46,"ɡ":47,"d̠ʒ":48,"":49}