Final model for experiment Dutch

Browse files

Files changed (6) hide show

README.md +47 -47
config.json +1 -1
model.safetensors +2 -2
tokenizer.json +47 -112
training_args.bin +1 -1
vocab.json +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 tags:
-- Mandarin
 - generated_from_trainer
 model-index:
 - name: childes-segmentation-100k-gpt2_lm-model
@@ -16,53 +16,53 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - epoch: 4000.0
-- eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.4895
-- eval_absolute_seg_boundary_fscore_Entropy: 0.5708
-- eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.4915
-- eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.6177
-- eval_absolute_seg_boundary_fscore_Increase in Loss: 0.6662
-- eval_absolute_seg_boundary_fscore_Increase in Rank: 0.7024
-- eval_absolute_seg_boundary_fscore_Loss: 0.5977
-- eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.7211
-- eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.7641
-- eval_absolute_seg_boundary_fscore_Rank: 0.6237
-- eval_absolute_seg_type_fscore_Boundary Prediction: 0.4682
-- eval_absolute_seg_type_fscore_Entropy: 0.3143
-- eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.5365
-- eval_absolute_seg_type_fscore_Increase in Entropy: 0.3262
-- eval_absolute_seg_type_fscore_Increase in Loss: 0.3540
-- eval_absolute_seg_type_fscore_Increase in Rank: 0.4184
-- eval_absolute_seg_type_fscore_Loss: 0.2843
-- eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4790
-- eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4732
-- eval_absolute_seg_type_fscore_Rank: 0.3459
-- eval_bpc: 4.5280
-- eval_loss: 3.1386
 - eval_model_preparation_time: 0.0008
-- eval_perplexity: 23.0705
-- eval_runtime: 45.72
-- eval_samples_per_second: 3.084
-- eval_spike_seg_boundary_fscore_Boundary Prediction: 0.8113
-- eval_spike_seg_boundary_fscore_Entropy: 0.5823
-- eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.8144
-- eval_spike_seg_boundary_fscore_Increase in Entropy: 0.6192
-- eval_spike_seg_boundary_fscore_Increase in Loss: 0.6641
-- eval_spike_seg_boundary_fscore_Increase in Rank: 0.7155
-- eval_spike_seg_boundary_fscore_Loss: 0.6074
-- eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.7518
-- eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.7574
-- eval_spike_seg_boundary_fscore_Rank: 0.6497
-- eval_spike_seg_type_fscore_Boundary Prediction: 0.5335
-- eval_spike_seg_type_fscore_Entropy: 0.3071
-- eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.5314
-- eval_spike_seg_type_fscore_Increase in Entropy: 0.3199
-- eval_spike_seg_type_fscore_Increase in Loss: 0.3475
-- eval_spike_seg_type_fscore_Increase in Rank: 0.4171
-- eval_spike_seg_type_fscore_Loss: 0.2965
-- eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4907
-- eval_spike_seg_type_fscore_Majority Vote Spike: 0.4385
-- eval_spike_seg_type_fscore_Rank: 0.3780
-- eval_steps_per_second: 0.109
 - step: 100000
 ## Model description

 ---
 library_name: transformers
 tags:
+- Dutch
 - generated_from_trainer
 model-index:
 - name: childes-segmentation-100k-gpt2_lm-model
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - epoch: 4000.0
+- eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.6381
+- eval_absolute_seg_boundary_fscore_Entropy: 0.4936
+- eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.6397
+- eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.6171
+- eval_absolute_seg_boundary_fscore_Increase in Loss: 0.6068
+- eval_absolute_seg_boundary_fscore_Increase in Rank: 0.6806
+- eval_absolute_seg_boundary_fscore_Loss: 0.5355
+- eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.7011
+- eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.7273
+- eval_absolute_seg_boundary_fscore_Rank: 0.5571
+- eval_absolute_seg_type_fscore_Boundary Prediction: 0.1447
+- eval_absolute_seg_type_fscore_Entropy: 0.2626
+- eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3233
+- eval_absolute_seg_type_fscore_Increase in Entropy: 0.3100
+- eval_absolute_seg_type_fscore_Increase in Loss: 0.2509
+- eval_absolute_seg_type_fscore_Increase in Rank: 0.4174
+- eval_absolute_seg_type_fscore_Loss: 0.2412
+- eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4319
+- eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4164
+- eval_absolute_seg_type_fscore_Rank: 0.2972
+- eval_bpc: 4.4805
+- eval_loss: 3.1056
 - eval_model_preparation_time: 0.0008
+- eval_perplexity: 22.3231
+- eval_runtime: 47.9148
+- eval_samples_per_second: 2.964
+- eval_spike_seg_boundary_fscore_Boundary Prediction: 0.7308
+- eval_spike_seg_boundary_fscore_Entropy: 0.5944
+- eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.7195
+- eval_spike_seg_boundary_fscore_Increase in Entropy: 0.6282
+- eval_spike_seg_boundary_fscore_Increase in Loss: 0.6258
+- eval_spike_seg_boundary_fscore_Increase in Rank: 0.6735
+- eval_spike_seg_boundary_fscore_Loss: 0.5578
+- eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.7329
+- eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.7085
+- eval_spike_seg_boundary_fscore_Rank: 0.6106
+- eval_spike_seg_type_fscore_Boundary Prediction: 0.3885
+- eval_spike_seg_type_fscore_Entropy: 0.2817
+- eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.3610
+- eval_spike_seg_type_fscore_Increase in Entropy: 0.2866
+- eval_spike_seg_type_fscore_Increase in Loss: 0.3167
+- eval_spike_seg_type_fscore_Increase in Rank: 0.3724
+- eval_spike_seg_type_fscore_Loss: 0.2626
+- eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4120
+- eval_spike_seg_type_fscore_Majority Vote Spike: 0.3425
+- eval_spike_seg_type_fscore_Rank: 0.3228
+- eval_steps_per_second: 0.104
 - step: 100000
 ## Model description

config.json CHANGED Viewed

@@ -27,5 +27,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
-  "vocab_size": 115
 }

   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
+  "vocab_size": 50
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f7efa21a386251f84b729d3205aa549d7c8926a03a23ff83f171997f706fda2
-size 2574104

 version https://git-lfs.github.com/spec/v1
+oid sha256:2419c1d9f086c904f711146ca854e725f4afa8b229953ec297133263c26d1b8d
+size 2540824

tokenizer.json CHANGED Viewed

@@ -51,7 +51,7 @@
     ]
   },
   "pre_tokenizer": {
-    "type": "WhitespaceSplit"
   },
   "post_processor": {
     "type": "TemplateProcessing",
@@ -115,117 +115,52 @@
       "PAD": 1,
       "WORD_BOUNDARY": 2,
       "UTT_BOUNDARY": 3,
-      "a˧˥": 4,
-      "u˧˥": 5,
-      "a˥": 6,
-      "au": 7,
-      "n": 8,
-      "a˥˩": 9,
-      "ʃ̺": 10,
-      "ɻ̩˥˩": 11,
-      "ə˧˥": 12,
-      "m": 13,
-      "ɤ": 14,
-      "p": 15,
-      "j": 16,
-      "e˧˥": 17,
-      "kʰ": 18,
-      "k": 19,
-      "ɤ˥˩": 20,
-      "w": 21,
-      "o˥": 22,
-      "t̠ʃ̺ʰ": 23,
-      "ə˥": 24,
-      "ŋ": 25,
-      "t": 26,
-      "ʊ˥": 27,
-      "ɕ": 28,
-      "i": 29,
-      "a": 30,
-      "l": 31,
-      "au˧˩˧": 32,
-      "x": 33,
-      "u˧˩˧": 34,
-      "i˥": 35,
-      "ei˧˩˧": 36,
-      "pʰ": 37,
-      "i˧˥": 38,
-      "ai˧˥": 39,
-      "ou˧˩˧": 40,
-      "ɤ˧˥": 41,
-      "o˧˩˧": 42,
-      "tɕ": 43,
-      "au˥˩": 44,
-      "ts": 45,
-      "ə˧˩˧": 46,
-      "ɤ˥": 47,
-      "ei˧˥": 48,
-      "ʊ˧˥": 49,
-      "i˧˩˧": 50,
-      "t̠ʃ̺": 51,
-      "ɻ̩˧˩˧": 52,
-      "ei˥˩": 53,
-      "s": 54,
-      "u˥˩": 55,
-      "ɹ̪̩": 56,
-      "ai˥": 57,
-      "u˥": 58,
-      "tɕʰ": 59,
-      "a˧˩˧": 60,
-      "ai˥˩": 61,
-      "ɛ˥˩": 62,
-      "f": 63,
-      "i˥˩": 64,
-      "y˥˩": 65,
-      "au˧˥": 66,
-      "ɻ": 67,
-      "ou˥˩": 68,
-      "e˥": 69,
-      "tʰ": 70,
-      "ɹ̪̩˥˩": 71,
-      "ɛ˧˥": 72,
-      "au˥": 73,
-      "ou˧˥": 74,
-      "e˧˩˧": 75,
-      "ɛ˥": 76,
-      "ɻ̩˥": 77,
-      "ɥ": 78,
-      "ɹ̪̩˧˩˧": 79,
-      "ai˧˩˧": 80,
-      "ou˥": 81,
-      "o˥˩": 82,
-      "ɛ˧˩˧": 83,
-      "ʊ˧˩˧": 84,
-      "ɔ˥": 85,
-      "tsʰ": 86,
-      "ei": 87,
-      "ə˥˩": 88,
-      "o": 89,
-      "ʊ˥˩": 90,
-      "ou": 91,
-      "ɤ˧˩˧": 92,
-      "o˧˥": 93,
-      "ei˥": 94,
-      "e˥˩": 95,
-      "ɚ˧˩˧": 96,
-      "y˥": 97,
-      "ɚ˥˩": 98,
-      "y˧˥": 99,
-      "ɻ̩": 100,
-      "y˧˩˧": 101,
-      "ɹ̪̩˥": 102,
-      "ɻ̩˧˥": 103,
-      "u": 104,
-      "ə": 105,
-      "ai": 106,
-      "ʊ": 107,
-      "e": 108,
-      "ɚ˧˥": 109,
-      "ɔ˥˩": 110,
-      "ɹ̪̩˧˥": 111,
-      "ɛ": 112,
-      "y": 113,
-      "m˧˥": 114
     },
     "unk_token": "UNK"
   }

     ]
   },
   "pre_tokenizer": {
+    "type": "Whitespace"
   },
   "post_processor": {
     "type": "TemplateProcessing",
       "PAD": 1,
       "WORD_BOUNDARY": 2,
       "UTT_BOUNDARY": 3,
+      "z": 4,
+      "oː": 5,
+      "j": 6,
+      "ãː": 7,
+      "ɦ": 8,
+      "ɾ": 9,
+      "d": 10,
+      "i": 11,
+      "ɛ": 12,
+      "p": 13,
+      "ɪ": 14,
+      "k": 15,
+      "ɑ": 16,
+      "l": 17,
+      "ɛː": 18,
+      "n": 19,
+      "s": 20,
+      "v": 21,
+      "ə": 22,
+      "ɛi": 23,
+      "ʋ": 24,
+      "t": 25,
+      "m": 26,
+      "ɣ": 27,
+      "ʏ": 28,
+      "ɔ": 29,
+      "x": 30,
+      "u": 31,
+      "f": 32,
+      "ŋ": 33,
+      "øː": 34,
+      "b": 35,
+      "ɔː": 36,
+      "ʌu": 37,
+      "y": 38,
+      "œy": 39,
+      "tʲ": 40,
+      "w": 41,
+      "ʃ": 42,
+      "t̠ʃ": 43,
+      "ɲ": 44,
+      "ʒ": 45,
+      "iː": 46,
+      "ɡ": 47,
+      "d̠ʒ": 48,
+      "ã": 49
     },
     "unk_token": "UNK"
   }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab5aeece3087b363ef297505f6884ea73978b1fc89a8380b1aea665daf958b97
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:051d8089b9b2a56e1beb0ffb7cb9d9effdd39e932954aebd1b6feb1c099a56c7
 size 5368

vocab.json CHANGED Viewed

@@ -1 +1 @@

- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"~~a˧˥~~":4,"~~u˧˥~~":5,"a˥":6,"au":7,"n":8,"~~a˥˩~~":9,"ʃ̺":10,"~~ɻ̩˥˩~~":11,"~~ə˧˥~~":12,"m":13,"ɤ":14,"p":15,"j":16,"~~e˧˥~~":17,"kʰ":18,"k":19,"~~ɤ˥˩~~":20,"w":21,"o˥":22,"~~t̠ʃ̺ʰ~~":23,"ə˥":24,"ŋ":25,"t":26,"ʊ˥":27,"ɕ":28,"i":29,"a":30,"l":31,"~~au˧˩˧~~":32,"x":33,"~~u˧˩˧~~":34,"i˥":35,"~~ei˧˩˧~~":36,"pʰ":37,"~~i˧˥~~":38,"~~ai˧˥~~":39,"~~ou˧˩˧~~":40,"~~ɤ˧˥~~":41,"~~o˧˩˧~~":42,"tɕ":43,"~~au˥˩~~":44,"ts":45,"~~ə˧˩˧~~":46,"ɤ˥":47,"~~ei˧˥~~":48,"~~ʊ˧˥~~":49,"i˧˩˧":50,"t̠ʃ̺":51,"ɻ̩˧˩˧":52,"ei˥˩":53,"s":54,"u˥˩":55,"ɹ̪̩":56,"ai˥":57,"u˥":58,"tɕʰ":59,"a˧˩˧":60,"ai˥˩":61,"ɛ˥˩":62,"f":63,"i˥˩":64,"y˥˩":65,"au˧˥":66,"ɻ":67,"ou˥˩":68,"e˥":69,"tʰ":70,"ɹ̪̩˥˩":71,"ɛ˧˥":72,"au˥":73,"ou˧˥":74,"e˧˩˧":75,"ɛ˥":76,"ɻ̩˥":77,"ɥ":78,"ɹ̪̩˧˩˧":79,"ai˧˩˧":80,"ou˥":81,"o˥˩":82,"ɛ˧˩˧":83,"ʊ˧˩˧":84,"ɔ˥":85,"tsʰ":86,"ei":87,"ə˥˩":88,"o":89,"ʊ˥˩":90,"ou":91,"ɤ˧˩˧":92,"o˧˥":93,"ei˥":94,"e˥˩":95,"ɚ˧˩˧":96,"y˥":97,"ɚ˥˩":98,"y˧˥":99,"ɻ̩":100,"y˧˩˧":101,"ɹ̪̩˥":102,"ɻ̩˧˥":103,"u":104,"ə":105,"ai":106,"ʊ":107,"e":108,"ɚ˧˥":109,"ɔ˥˩":110,"ɹ̪̩˧˥":111,"ɛ":112,"y":113,"m˧˥":114}

+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"z":4,"oː":5,"j":6,"ãː":7,"ɦ":8,"ɾ":9,"d":10,"i":11,"ɛ":12,"p":13,"ɪ":14,"k":15,"ɑ":16,"l":17,"ɛː":18,"n":19,"s":20,"v":21,"ə":22,"ɛi":23,"ʋ":24,"t":25,"m":26,"ɣ":27,"ʏ":28,"ɔ":29,"x":30,"u":31,"f":32,"ŋ":33,"øː":34,"b":35,"ɔː":36,"ʌu":37,"y":38,"œy":39,"tʲ":40,"w":41,"ʃ":42,"t̠ʃ":43,"ɲ":44,"ʒ":45,"iː":46,"ɡ":47,"d̠ʒ":48,"ã":49}