codebyzeb commited on
Commit
eaa3040
·
verified ·
1 Parent(s): cd1df9a

Final model for experiment Estonian

Browse files
Files changed (6) hide show
  1. README.md +47 -47
  2. config.json +1 -1
  3. model.safetensors +2 -2
  4. tokenizer.json +63 -26
  5. training_args.bin +1 -1
  6. vocab.json +1 -1
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  tags:
4
- - Indonesian
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
@@ -16,53 +16,53 @@ should probably proofread and complete it, then remove this comment. -->
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
- - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.5187
20
- - eval_absolute_seg_boundary_fscore_Entropy: 0.4017
21
- - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.5413
22
- - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.4616
23
- - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.5806
24
- - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.6071
25
- - eval_absolute_seg_boundary_fscore_Loss: 0.4839
26
- - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.6298
27
- - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.6048
28
- - eval_absolute_seg_boundary_fscore_Rank: 0.5122
29
- - eval_absolute_seg_type_fscore_Boundary Prediction: 0.3513
30
- - eval_absolute_seg_type_fscore_Entropy: 0.2802
31
- - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3813
32
- - eval_absolute_seg_type_fscore_Increase in Entropy: 0.3222
33
- - eval_absolute_seg_type_fscore_Increase in Loss: 0.3824
34
- - eval_absolute_seg_type_fscore_Increase in Rank: 0.4208
35
- - eval_absolute_seg_type_fscore_Loss: 0.3151
36
- - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4692
37
- - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4276
38
- - eval_absolute_seg_type_fscore_Rank: 0.3621
39
- - eval_bpc: 4.2803
40
- - eval_loss: 2.9669
41
  - eval_model_preparation_time: 0.0008
42
- - eval_perplexity: 19.4309
43
- - eval_runtime: 23.9581
44
- - eval_samples_per_second: 5.426
45
- - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.5220
46
- - eval_spike_seg_boundary_fscore_Entropy: 0.4477
47
- - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.5144
48
- - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.4307
49
- - eval_spike_seg_boundary_fscore_Increase in Loss: 0.5371
50
- - eval_spike_seg_boundary_fscore_Increase in Rank: 0.5614
51
- - eval_spike_seg_boundary_fscore_Loss: 0.5245
52
- - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.5996
53
- - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.5457
54
- - eval_spike_seg_boundary_fscore_Rank: 0.5726
55
- - eval_spike_seg_type_fscore_Boundary Prediction: 0.2919
56
- - eval_spike_seg_type_fscore_Entropy: 0.2635
57
- - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.2716
58
- - eval_spike_seg_type_fscore_Increase in Entropy: 0.2197
59
- - eval_spike_seg_type_fscore_Increase in Loss: 0.2948
60
- - eval_spike_seg_type_fscore_Increase in Rank: 0.3411
61
- - eval_spike_seg_type_fscore_Loss: 0.2883
62
- - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4045
63
- - eval_spike_seg_type_fscore_Majority Vote Spike: 0.2782
64
- - eval_spike_seg_type_fscore_Rank: 0.3612
65
- - eval_steps_per_second: 0.209
66
  - step: 100000
67
 
68
  ## Model description
 
1
  ---
2
  library_name: transformers
3
  tags:
4
+ - Estonian
5
  - generated_from_trainer
6
  model-index:
7
  - name: childes-segmentation-100k-gpt2_lm-model
 
16
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - epoch: 4000.0
19
+ - eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.5173
20
+ - eval_absolute_seg_boundary_fscore_Entropy: 0.4122
21
+ - eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.5774
22
+ - eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.4825
23
+ - eval_absolute_seg_boundary_fscore_Increase in Loss: 0.5014
24
+ - eval_absolute_seg_boundary_fscore_Increase in Rank: 0.5198
25
+ - eval_absolute_seg_boundary_fscore_Loss: 0.4373
26
+ - eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.5816
27
+ - eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.5640
28
+ - eval_absolute_seg_boundary_fscore_Rank: 0.4601
29
+ - eval_absolute_seg_type_fscore_Boundary Prediction: 0.3117
30
+ - eval_absolute_seg_type_fscore_Entropy: 0.2019
31
+ - eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3048
32
+ - eval_absolute_seg_type_fscore_Increase in Entropy: 0.2221
33
+ - eval_absolute_seg_type_fscore_Increase in Loss: 0.2382
34
+ - eval_absolute_seg_type_fscore_Increase in Rank: 0.2725
35
+ - eval_absolute_seg_type_fscore_Loss: 0.1844
36
+ - eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.3083
37
+ - eval_absolute_seg_type_fscore_Majority Vote Spike: 0.2870
38
+ - eval_absolute_seg_type_fscore_Rank: 0.2212
39
+ - eval_bpc: 4.5092
40
+ - eval_loss: 3.1255
41
  - eval_model_preparation_time: 0.0008
42
+ - eval_perplexity: 22.7722
43
+ - eval_runtime: 11.7267
44
+ - eval_samples_per_second: 16.884
45
+ - eval_spike_seg_boundary_fscore_Boundary Prediction: 0.6098
46
+ - eval_spike_seg_boundary_fscore_Entropy: 0.4166
47
+ - eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.6078
48
+ - eval_spike_seg_boundary_fscore_Increase in Entropy: 0.4578
49
+ - eval_spike_seg_boundary_fscore_Increase in Loss: 0.4777
50
+ - eval_spike_seg_boundary_fscore_Increase in Rank: 0.5160
51
+ - eval_spike_seg_boundary_fscore_Loss: 0.4197
52
+ - eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.5653
53
+ - eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.5421
54
+ - eval_spike_seg_boundary_fscore_Rank: 0.4477
55
+ - eval_spike_seg_type_fscore_Boundary Prediction: 0.3007
56
+ - eval_spike_seg_type_fscore_Entropy: 0.1823
57
+ - eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.2908
58
+ - eval_spike_seg_type_fscore_Increase in Entropy: 0.1909
59
+ - eval_spike_seg_type_fscore_Increase in Loss: 0.1929
60
+ - eval_spike_seg_type_fscore_Increase in Rank: 0.2341
61
+ - eval_spike_seg_type_fscore_Loss: 0.1525
62
+ - eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.2721
63
+ - eval_spike_seg_type_fscore_Majority Vote Spike: 0.2019
64
+ - eval_spike_seg_type_fscore_Rank: 0.1958
65
+ - eval_steps_per_second: 0.597
66
  - step: 100000
67
 
68
  ## Model description
config.json CHANGED
@@ -27,5 +27,5 @@
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
- "vocab_size": 31
31
  }
 
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.44.2",
29
  "use_cache": true,
30
+ "vocab_size": 68
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84fe774c468a033f724e41b8a5406299e48e82cccf7326ca4da90e906360fbcc
3
- size 2531096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d028c1e5ff80b0b3b65eb47e07892265643b1df9517383cdcc42fcbf07c0bcb
3
+ size 2550040
tokenizer.json CHANGED
@@ -115,33 +115,70 @@
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
- "s": 4,
119
- "i": 5,
120
- "n": 6,
121
- "m": 7,
122
  "a": 8,
123
- "j": 9,
124
- "u": 10,
125
- "k": 11,
126
- "o": 12,
127
- "h": 13,
128
- "l": 14,
129
- "t": 15,
130
- "w": 16,
131
- "d̠ʒ": 17,
132
- "ŋ": 18,
133
- "ə": 19,
134
- "d": 20,
135
- "p": 21,
136
- "ɡ": 22,
137
- "b": 23,
138
- "r": 24,
139
- "ɲ": 25,
140
- "t̠ʃ": 26,
141
- "f": 27,
142
- "z": 28,
143
- "ʃ": 29,
144
- "x": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  },
146
  "unk_token": "UNK"
147
  }
 
115
  "PAD": 1,
116
  "WORD_BOUNDARY": 2,
117
  "UTT_BOUNDARY": 3,
118
+ "n": 4,
119
+ "o": 5,
120
+ "t": 6,
121
+ "ʃ": 7,
122
  "a": 8,
123
+ "": 9,
124
+ "m": 10,
125
+ "u": 11,
126
+ "": 12,
127
+ "i": 13,
128
+ "s": 14,
129
+ "": 15,
130
+ "d": 16,
131
+ "": 17,
132
+ "k": 18,
133
+ "ɡ": 19,
134
+ "ɑ": 20,
135
+ "ɤ": 21,
136
+ "ʊ": 22,
137
+ "": 23,
138
+ "j": 24,
139
+ "": 25,
140
+ "h": 26,
141
+ "v": 27,
142
+ "æi": 28,
143
+ "": 29,
144
+ "e": 30,
145
+ "ɪ": 31,
146
+ "tː": 32,
147
+ "r": 33,
148
+ "ɛ": 34,
149
+ "mː": 35,
150
+ "p": 36,
151
+ "sː": 37,
152
+ "æ": 38,
153
+ "l": 39,
154
+ "pː": 40,
155
+ "yː": 41,
156
+ "æː": 42,
157
+ "b": 43,
158
+ "ɔ": 44,
159
+ "ɤː": 45,
160
+ "lː": 46,
161
+ "ø": 47,
162
+ "øː": 48,
163
+ "ŋ": 49,
164
+ "y": 50,
165
+ "oː": 51,
166
+ "rː": 52,
167
+ "ɲ": 53,
168
+ "nː": 54,
169
+ "w": 55,
170
+ "tʲː": 56,
171
+ "øɪ̯": 57,
172
+ "f": 58,
173
+ "dʲ": 59,
174
+ "sʲː": 60,
175
+ "t̠ʃ": 61,
176
+ "ʃː": 62,
177
+ "ʒ": 63,
178
+ "z": 64,
179
+ "fː": 65,
180
+ "dː": 66,
181
+ "yi": 67
182
  },
183
  "unk_token": "UNK"
184
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0948cb11c8ef3cbbac150d6b065e41c48eafcb71430ca203d424013a0dc9d431
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9f2ff350f6b9b053d00cc795964f932021ce06c9925b1fc687b3465fc34f31
3
  size 5368
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"s":4,"i":5,"n":6,"m":7,"a":8,"j":9,"u":10,"k":11,"o":12,"h":13,"l":14,"t":15,"w":16,"d̠ʒ":17,"ŋ":18,"ə":19,"d":20,"p":21,"ɡ":22,"b":23,"r":24,"ɲ":25,"t̠ʃ":26,"f":27,"z":28,"ʃ":29,"x":30}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"n":4,"o":5,"t":6,"ʃ":7,"a":8,"":9,"m":10,"u":11,"":12,"i":13,"s":14,"":15,"d":16,"":17,"k":18,"ɡ":19,"ɑ":20,"ɤ":21,"ʊ":22,"":23,"j":24,"":25,"h":26,"v":27,"æi":28,"":29,"e":30,"ɪ":31,"tː":32,"r":33,"ɛ":34,"mː":35,"p":36,"sː":37,"æ":38,"l":39,"pː":40,"yː":41,"æː":42,"b":43,"ɔ":44,"ɤː":45,"lː":46,"ø":47,"øː":48,"ŋ":49,"y":50,"oː":51,"rː":52,"ɲ":53,"nː":54,"w":55,"tʲː":56,"øɪ̯":57,"f":58,"dʲ":59,"sʲː":60,"t̠ʃ":61,"ʃː":62,"ʒ":63,"z":64,"fː":65,"dː":66,"yi":67}