codebyzeb commited on
Commit
b048d25
·
verified ·
1 Parent(s): eaa3040

Final model for experiment Mandarin

Browse files
Files changed (6) hide show
  1. README.md +47 -47
  2. config.json +1 -1
  3. model.safetensors +2 -2
  4. tokenizer.json +112 -65
  5. training_args.bin +1 -1
  6. vocab.json +1 -1
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  tags:
4
- - Estonian
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
@@ -16,53 +16,53 @@ should probably proofread and complete it, then remove this comment. -->
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
- - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.5173
20
- - eval_absolute_seg_boundary_fscore_Entropy: 0.4122
21
- - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.5774
22
- - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.4825
23
- - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.5014
24
- - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.5198
25
- - eval_absolute_seg_boundary_fscore_Loss: 0.4373
26
- - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.5816
27
- - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.5640
28
- - eval_absolute_seg_boundary_fscore_Rank: 0.4601
29
- - eval_absolute_seg_type_fscore_Boundary Prediction: 0.3117
30
- - eval_absolute_seg_type_fscore_Entropy: 0.2019
31
- - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3048
32
- - eval_absolute_seg_type_fscore_Increase in Entropy: 0.2221
33
- - eval_absolute_seg_type_fscore_Increase in Loss: 0.2382
34
- - eval_absolute_seg_type_fscore_Increase in Rank: 0.2725
35
- - eval_absolute_seg_type_fscore_Loss: 0.1844
36
- - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.3083
37
- - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.2870
38
- - eval_absolute_seg_type_fscore_Rank: 0.2212
39
- - eval_bpc: 4.5092
40
- - eval_loss: 3.1255
41
  - eval_model_preparation_time: 0.0008
42
- - eval_perplexity: 22.7722
43
- - eval_runtime: 11.7267
44
- - eval_samples_per_second: 16.884
45
- - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.6098
46
- - eval_spike_seg_boundary_fscore_Entropy: 0.4166
47
- - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.6078
48
- - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.4578
49
- - eval_spike_seg_boundary_fscore_Increase in Loss: 0.4777
50
- - eval_spike_seg_boundary_fscore_Increase in Rank: 0.5160
51
- - eval_spike_seg_boundary_fscore_Loss: 0.4197
52
- - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.5653
53
- - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.5421
54
- - eval_spike_seg_boundary_fscore_Rank: 0.4477
55
- - eval_spike_seg_type_fscore_Boundary Prediction: 0.3007
56
- - eval_spike_seg_type_fscore_Entropy: 0.1823
57
- - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.2908
58
- - eval_spike_seg_type_fscore_Increase in Entropy: 0.1909
59
- - eval_spike_seg_type_fscore_Increase in Loss: 0.1929
60
- - eval_spike_seg_type_fscore_Increase in Rank: 0.2341
61
- - eval_spike_seg_type_fscore_Loss: 0.1525
62
- - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.2721
63
- - eval_spike_seg_type_fscore_Majority Vote Spike: 0.2019
64
- - eval_spike_seg_type_fscore_Rank: 0.1958
65
- - eval_steps_per_second: 0.597
66
  - step: 100000
67
 
68
  ## Model description
 
1
  ---
2
  library_name: transformers
3
  tags:
4
+ - Mandarin
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
 
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
+ - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.4895
20
+ - eval_absolute_seg_boundary_fscore_Entropy: 0.5708
21
+ - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.4915
22
+ - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.6177
23
+ - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.6662
24
+ - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.7024
25
+ - eval_absolute_seg_boundary_fscore_Loss: 0.5977
26
+ - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.7211
27
+ - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.7641
28
+ - eval_absolute_seg_boundary_fscore_Rank: 0.6237
29
+ - eval_absolute_seg_type_fscore_Boundary Prediction: 0.4682
30
+ - eval_absolute_seg_type_fscore_Entropy: 0.3143
31
+ - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.5365
32
+ - eval_absolute_seg_type_fscore_Increase in Entropy: 0.3262
33
+ - eval_absolute_seg_type_fscore_Increase in Loss: 0.3540
34
+ - eval_absolute_seg_type_fscore_Increase in Rank: 0.4184
35
+ - eval_absolute_seg_type_fscore_Loss: 0.2843
36
+ - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4790
37
+ - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4732
38
+ - eval_absolute_seg_type_fscore_Rank: 0.3459
39
+ - eval_bpc: 4.5280
40
+ - eval_loss: 3.1386
41
  - eval_model_preparation_time: 0.0008
42
+ - eval_perplexity: 23.0705
43
+ - eval_runtime: 45.72
44
+ - eval_samples_per_second: 3.084
45
+ - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.8113
46
+ - eval_spike_seg_boundary_fscore_Entropy: 0.5823
47
+ - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.8144
48
+ - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.6192
49
+ - eval_spike_seg_boundary_fscore_Increase in Loss: 0.6641
50
+ - eval_spike_seg_boundary_fscore_Increase in Rank: 0.7155
51
+ - eval_spike_seg_boundary_fscore_Loss: 0.6074
52
+ - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.7518
53
+ - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.7574
54
+ - eval_spike_seg_boundary_fscore_Rank: 0.6497
55
+ - eval_spike_seg_type_fscore_Boundary Prediction: 0.5335
56
+ - eval_spike_seg_type_fscore_Entropy: 0.3071
57
+ - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.5314
58
+ - eval_spike_seg_type_fscore_Increase in Entropy: 0.3199
59
+ - eval_spike_seg_type_fscore_Increase in Loss: 0.3475
60
+ - eval_spike_seg_type_fscore_Increase in Rank: 0.4171
61
+ - eval_spike_seg_type_fscore_Loss: 0.2965
62
+ - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4907
63
+ - eval_spike_seg_type_fscore_Majority Vote Spike: 0.4385
64
+ - eval_spike_seg_type_fscore_Rank: 0.3780
65
+ - eval_steps_per_second: 0.109
66
  - step: 100000
67
 
68
  ## Model description
config.json CHANGED
@@ -27,5 +27,5 @@
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
- "vocab_size": 68
31
  }
 
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
+ "vocab_size": 115
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d028c1e5ff80b0b3b65eb47e07892265643b1df9517383cdcc42fcbf07c0bcb
3
- size 2550040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7efa21a386251f84b729d3205aa549d7c8926a03a23ff83f171997f706fda2
3
+ size 2574104
tokenizer.json CHANGED
@@ -51,7 +51,7 @@
51
  ]
52
  },
53
  "pre_tokenizer": {
54
- "type": "Whitespace"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
@@ -115,70 +115,117 @@
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
- "n": 4,
119
- "o": 5,
120
- "t": 6,
121
- "ʃ": 7,
122
- "a": 8,
123
- "": 9,
124
- "m": 10,
125
- "u": 11,
126
- "": 12,
127
- "i": 13,
128
- "s": 14,
129
- "": 15,
130
- "d": 16,
131
- "": 17,
132
- "k": 18,
133
- "ɡ": 19,
134
- "ɑ": 20,
135
- "ɤ": 21,
136
- "ʊ": 22,
137
- "": 23,
138
- "j": 24,
139
- "": 25,
140
- "h": 26,
141
- "v": 27,
142
- "æi": 28,
143
- "": 29,
144
- "e": 30,
145
- "ɪ": 31,
146
- "": 32,
147
- "r": 33,
148
- "ɛ": 34,
149
- "": 35,
150
- "p": 36,
151
- "": 37,
152
- "æ": 38,
153
- "l": 39,
154
- "": 40,
155
- "": 41,
156
- "æː": 42,
157
- "b": 43,
158
- "ɔ": 44,
159
- "ɤː": 45,
160
- "": 46,
161
- "ø": 47,
162
- "øː": 48,
163
- "ŋ": 49,
164
- "y": 50,
165
- "": 51,
166
- "": 52,
167
- "ɲ": 53,
168
- "": 54,
169
- "w": 55,
170
- "tʲː": 56,
171
- "øɪ̯": 57,
172
- "f": 58,
173
- "": 59,
174
- "sʲː": 60,
175
- "t̠ʃ": 61,
176
- "ʃː": 62,
177
- "ʒ": 63,
178
- "z": 64,
179
- "": 65,
180
- "": 66,
181
- "yi": 67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  },
183
  "unk_token": "UNK"
184
  }
 
51
  ]
52
  },
53
  "pre_tokenizer": {
54
+ "type": "WhitespaceSplit"
55
  },
56
  "post_processor": {
57
  "type": "TemplateProcessing",
 
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
+ "a˧˥": 4,
119
+ "u˧˥": 5,
120
+ "": 6,
121
+ "au": 7,
122
+ "n": 8,
123
+ "a˥˩": 9,
124
+ "ʃ̺": 10,
125
+ "ɻ̩˥˩": 11,
126
+ "ə˧˥": 12,
127
+ "m": 13,
128
+ "ɤ": 14,
129
+ "p": 15,
130
+ "j": 16,
131
+ "e˧˥": 17,
132
+ "": 18,
133
+ "k": 19,
134
+ "ɤ˥˩": 20,
135
+ "w": 21,
136
+ "": 22,
137
+ "t̠ʃ̺ʰ": 23,
138
+ "ə˥": 24,
139
+ "ŋ": 25,
140
+ "t": 26,
141
+ "ʊ˥": 27,
142
+ "ɕ": 28,
143
+ "i": 29,
144
+ "a": 30,
145
+ "l": 31,
146
+ "au˧˩˧": 32,
147
+ "x": 33,
148
+ "u˧˩˧": 34,
149
+ "": 35,
150
+ "ei˧˩˧": 36,
151
+ "": 37,
152
+ "i˧˥": 38,
153
+ "ai˧˥": 39,
154
+ "ou˧˩˧": 40,
155
+ "ɤ˧˥": 41,
156
+ "o˧˩˧": 42,
157
+ "": 43,
158
+ "au˥˩": 44,
159
+ "ts": 45,
160
+ "ə˧˩˧": 46,
161
+ "ɤ˥": 47,
162
+ "ei˧˥": 48,
163
+ "ʊ˧˥": 49,
164
+ "i˧˩˧": 50,
165
+ "t̠ʃ̺": 51,
166
+ "ɻ̩˧˩˧": 52,
167
+ "ei˥˩": 53,
168
+ "s": 54,
169
+ "u˥˩": 55,
170
+ "ɹ̪̩": 56,
171
+ "ai˥": 57,
172
+ "": 58,
173
+ "tɕʰ": 59,
174
+ "a˧˩˧": 60,
175
+ "ai˥˩": 61,
176
+ "ɛ˥˩": 62,
177
+ "f": 63,
178
+ "i˥˩": 64,
179
+ "y˥˩": 65,
180
+ "au˧˥": 66,
181
+ "ɻ": 67,
182
+ "ou˥˩": 68,
183
+ "e˥": 69,
184
+ "tʰ": 70,
185
+ "ɹ̪̩˥˩": 71,
186
+ "ɛ˧˥": 72,
187
+ "au˥": 73,
188
+ "ou˧˥": 74,
189
+ "e˧˩˧": 75,
190
+ "ɛ˥": 76,
191
+ "ɻ̩˥": 77,
192
+ "ɥ": 78,
193
+ "ɹ̪̩˧˩˧": 79,
194
+ "ai˧˩˧": 80,
195
+ "ou˥": 81,
196
+ "o˥˩": 82,
197
+ "ɛ˧˩˧": 83,
198
+ "ʊ˧˩˧": 84,
199
+ "ɔ˥": 85,
200
+ "tsʰ": 86,
201
+ "ei": 87,
202
+ "ə˥˩": 88,
203
+ "o": 89,
204
+ "ʊ˥˩": 90,
205
+ "ou": 91,
206
+ "ɤ˧˩˧": 92,
207
+ "o˧˥": 93,
208
+ "ei˥": 94,
209
+ "e˥˩": 95,
210
+ "ɚ˧˩˧": 96,
211
+ "y˥": 97,
212
+ "ɚ˥˩": 98,
213
+ "y˧˥": 99,
214
+ "ɻ̩": 100,
215
+ "y˧˩˧": 101,
216
+ "ɹ̪̩˥": 102,
217
+ "ɻ̩˧˥": 103,
218
+ "u": 104,
219
+ "ə": 105,
220
+ "ai": 106,
221
+ "ʊ": 107,
222
+ "e": 108,
223
+ "ɚ˧˥": 109,
224
+ "ɔ˥˩": 110,
225
+ "ɹ̪̩˧˥": 111,
226
+ "ɛ": 112,
227
+ "y": 113,
228
+ "m˧˥": 114
229
  },
230
  "unk_token": "UNK"
231
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de9f2ff350f6b9b053d00cc795964f932021ce06c9925b1fc687b3465fc34f31
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5aeece3087b363ef297505f6884ea73978b1fc89a8380b1aea665daf958b97
3
  size 5368
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"n":4,"o":5,"t":6,"ʃ":7,"a":8,"":9,"m":10,"u":11,"":12,"i":13,"s":14,"":15,"d":16,"":17,"k":18,"ɡ":19,"ɑ":20,"ɤ":21,"ʊ":22,"":23,"j":24,"":25,"h":26,"v":27,"æi":28,"":29,"e":30,"ɪ":31,"":32,"r":33,"ɛ":34,"":35,"p":36,"":37,"æ":38,"l":39,"":40,"":41,"æː":42,"b":43,"ɔ":44,"ɤː":45,"":46,"ø":47,"øː":48,"ŋ":49,"y":50,"":51,"":52,"ɲ":53,"":54,"w":55,"tʲː":56,"øɪ̯":57,"f":58,"":59,"sʲː":60,"t̠ʃ":61,"ʃː":62,"ʒ":63,"z":64,"":65,"":66,"yi":67}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"a˧˥":4,"u˧˥":5,"":6,"au":7,"n":8,"a˥˩":9,"ʃ̺":10,"ɻ̩˥˩":11,"ə˧˥":12,"m":13,"ɤ":14,"p":15,"j":16,"e˧˥":17,"":18,"k":19,"ɤ˥˩":20,"w":21,"":22,"t̠ʃ̺ʰ":23,"ə˥":24,"ŋ":25,"t":26,"ʊ˥":27,"ɕ":28,"i":29,"a":30,"l":31,"au˧˩˧":32,"x":33,"u˧˩˧":34,"":35,"ei˧˩˧":36,"":37,"i˧˥":38,"ai˧˥":39,"ou˧˩˧":40,"ɤ˧˥":41,"o˧˩˧":42,"":43,"au˥˩":44,"ts":45,"ə˧˩˧":46,"ɤ˥":47,"ei˧˥":48,"ʊ˧˥":49,"i˧˩˧":50,"t̠ʃ̺":51,"ɻ̩˧˩˧":52,"ei˥˩":53,"s":54,"u˥˩":55,"ɹ̪̩":56,"ai˥":57,"":58,"tɕʰ":59,"a˧˩˧":60,"ai˥˩":61,"ɛ˥˩":62,"f":63,"i˥˩":64,"y˥˩":65,"au˧˥":66,"ɻ":67,"ou˥˩":68,"e˥":69,"tʰ":70,"ɹ̪̩˥˩":71,"ɛ˧˥":72,"au˥":73,"ou˧˥":74,"e˧˩˧":75,"ɛ˥":76,"ɻ̩˥":77,"ɥ":78,"ɹ̪̩˧˩˧":79,"ai˧˩˧":80,"ou˥":81,"o˥˩":82,"ɛ˧˩˧":83,"ʊ˧˩˧":84,"ɔ˥":85,"tsʰ":86,"ei":87,"ə˥˩":88,"o":89,"ʊ˥˩":90,"ou":91,"ɤ˧˩˧":92,"o˧˥":93,"ei˥":94,"e˥˩":95,"ɚ˧˩˧":96,"y˥":97,"ɚ˥˩":98,"y˧˥":99,"ɻ̩":100,"y˧˩˧":101,"ɹ̪̩˥":102,"ɻ̩˧˥":103,"u":104,"ə":105,"ai":106,"ʊ":107,"e":108,"ɚ˧˥":109,"ɔ˥˩":110,"ɹ̪̩˧˥":111,"ɛ":112,"y":113,"m˧˥":114}