Final model for experiment Estonian

Browse files

Files changed (6) hide show

README.md +47 -47
config.json +1 -1
model.safetensors +2 -2
tokenizer.json +63 -26
training_args.bin +1 -1
vocab.json +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 tags:
-- Indonesian
 - generated_from_trainer
 model-index:
 - name: childes-segmentation-100k-gpt2_lm-model
@@ -16,53 +16,53 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - epoch: 4000.0
-- eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.5187
-- eval_absolute_seg_boundary_fscore_Entropy: 0.4017
-- eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.5413
-- eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.4616
-- eval_absolute_seg_boundary_fscore_Increase in Loss: 0.5806
-- eval_absolute_seg_boundary_fscore_Increase in Rank: 0.6071
-- eval_absolute_seg_boundary_fscore_Loss: 0.4839
-- eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.6298
-- eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.6048
-- eval_absolute_seg_boundary_fscore_Rank: 0.5122
-- eval_absolute_seg_type_fscore_Boundary Prediction: 0.3513
-- eval_absolute_seg_type_fscore_Entropy: 0.2802
-- eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3813
-- eval_absolute_seg_type_fscore_Increase in Entropy: 0.3222
-- eval_absolute_seg_type_fscore_Increase in Loss: 0.3824
-- eval_absolute_seg_type_fscore_Increase in Rank: 0.4208
-- eval_absolute_seg_type_fscore_Loss: 0.3151
-- eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.4692
-- eval_absolute_seg_type_fscore_Majority Vote Spike: 0.4276
-- eval_absolute_seg_type_fscore_Rank: 0.3621
-- eval_bpc: 4.2803
-- eval_loss: 2.9669
 - eval_model_preparation_time: 0.0008
-- eval_perplexity: 19.4309
-- eval_runtime: 23.9581
-- eval_samples_per_second: 5.426
-- eval_spike_seg_boundary_fscore_Boundary Prediction: 0.5220
-- eval_spike_seg_boundary_fscore_Entropy: 0.4477
-- eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.5144
-- eval_spike_seg_boundary_fscore_Increase in Entropy: 0.4307
-- eval_spike_seg_boundary_fscore_Increase in Loss: 0.5371
-- eval_spike_seg_boundary_fscore_Increase in Rank: 0.5614
-- eval_spike_seg_boundary_fscore_Loss: 0.5245
-- eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.5996
-- eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.5457
-- eval_spike_seg_boundary_fscore_Rank: 0.5726
-- eval_spike_seg_type_fscore_Boundary Prediction: 0.2919
-- eval_spike_seg_type_fscore_Entropy: 0.2635
-- eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.2716
-- eval_spike_seg_type_fscore_Increase in Entropy: 0.2197
-- eval_spike_seg_type_fscore_Increase in Loss: 0.2948
-- eval_spike_seg_type_fscore_Increase in Rank: 0.3411
-- eval_spike_seg_type_fscore_Loss: 0.2883
-- eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.4045
-- eval_spike_seg_type_fscore_Majority Vote Spike: 0.2782
-- eval_spike_seg_type_fscore_Rank: 0.3612
-- eval_steps_per_second: 0.209
 - step: 100000
 ## Model description

 ---
 library_name: transformers
 tags:
+- Estonian
 - generated_from_trainer
 model-index:
 - name: childes-segmentation-100k-gpt2_lm-model
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - epoch: 4000.0
+- eval_absolute_seg_boundary_fscore_Boundary Prediction: 0.5173
+- eval_absolute_seg_boundary_fscore_Entropy: 0.4122
+- eval_absolute_seg_boundary_fscore_Increase in Boundary Prediction: 0.5774
+- eval_absolute_seg_boundary_fscore_Increase in Entropy: 0.4825
+- eval_absolute_seg_boundary_fscore_Increase in Loss: 0.5014
+- eval_absolute_seg_boundary_fscore_Increase in Rank: 0.5198
+- eval_absolute_seg_boundary_fscore_Loss: 0.4373
+- eval_absolute_seg_boundary_fscore_Majority Vote Cutoff: 0.5816
+- eval_absolute_seg_boundary_fscore_Majority Vote Spike: 0.5640
+- eval_absolute_seg_boundary_fscore_Rank: 0.4601
+- eval_absolute_seg_type_fscore_Boundary Prediction: 0.3117
+- eval_absolute_seg_type_fscore_Entropy: 0.2019
+- eval_absolute_seg_type_fscore_Increase in Boundary Prediction: 0.3048
+- eval_absolute_seg_type_fscore_Increase in Entropy: 0.2221
+- eval_absolute_seg_type_fscore_Increase in Loss: 0.2382
+- eval_absolute_seg_type_fscore_Increase in Rank: 0.2725
+- eval_absolute_seg_type_fscore_Loss: 0.1844
+- eval_absolute_seg_type_fscore_Majority Vote Cutoff: 0.3083
+- eval_absolute_seg_type_fscore_Majority Vote Spike: 0.2870
+- eval_absolute_seg_type_fscore_Rank: 0.2212
+- eval_bpc: 4.5092
+- eval_loss: 3.1255
 - eval_model_preparation_time: 0.0008
+- eval_perplexity: 22.7722
+- eval_runtime: 11.7267
+- eval_samples_per_second: 16.884
+- eval_spike_seg_boundary_fscore_Boundary Prediction: 0.6098
+- eval_spike_seg_boundary_fscore_Entropy: 0.4166
+- eval_spike_seg_boundary_fscore_Increase in Boundary Prediction: 0.6078
+- eval_spike_seg_boundary_fscore_Increase in Entropy: 0.4578
+- eval_spike_seg_boundary_fscore_Increase in Loss: 0.4777
+- eval_spike_seg_boundary_fscore_Increase in Rank: 0.5160
+- eval_spike_seg_boundary_fscore_Loss: 0.4197
+- eval_spike_seg_boundary_fscore_Majority Vote Cutoff: 0.5653
+- eval_spike_seg_boundary_fscore_Majority Vote Spike: 0.5421
+- eval_spike_seg_boundary_fscore_Rank: 0.4477
+- eval_spike_seg_type_fscore_Boundary Prediction: 0.3007
+- eval_spike_seg_type_fscore_Entropy: 0.1823
+- eval_spike_seg_type_fscore_Increase in Boundary Prediction: 0.2908
+- eval_spike_seg_type_fscore_Increase in Entropy: 0.1909
+- eval_spike_seg_type_fscore_Increase in Loss: 0.1929
+- eval_spike_seg_type_fscore_Increase in Rank: 0.2341
+- eval_spike_seg_type_fscore_Loss: 0.1525
+- eval_spike_seg_type_fscore_Majority Vote Cutoff: 0.2721
+- eval_spike_seg_type_fscore_Majority Vote Spike: 0.2019
+- eval_spike_seg_type_fscore_Rank: 0.1958
+- eval_steps_per_second: 0.597
 - step: 100000
 ## Model description

config.json CHANGED Viewed

@@ -27,5 +27,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
-  "vocab_size": 31
 }

   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
+  "vocab_size": 68
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84fe774c468a033f724e41b8a5406299e48e82cccf7326ca4da90e906360fbcc
-size 2531096

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d028c1e5ff80b0b3b65eb47e07892265643b1df9517383cdcc42fcbf07c0bcb
+size 2550040

tokenizer.json CHANGED Viewed

@@ -115,33 +115,70 @@
       "PAD": 1,
       "WORD_BOUNDARY": 2,
       "UTT_BOUNDARY": 3,
-      "s": 4,
-      "i": 5,
-      "n": 6,
-      "m": 7,
       "a": 8,
-      "j": 9,
-      "u": 10,
-      "k": 11,
-      "o": 12,
-      "h": 13,
-      "l": 14,
-      "t": 15,
-      "w": 16,
-      "d̠ʒ": 17,
-      "ŋ": 18,
-      "ə": 19,
-      "d": 20,
-      "p": 21,
-      "ɡ": 22,
-      "b": 23,
-      "r": 24,
-      "ɲ": 25,
-      "t̠ʃ": 26,
-      "f": 27,
-      "z": 28,
-      "ʃ": 29,
-      "x": 30
     },
     "unk_token": "UNK"
   }

       "PAD": 1,
       "WORD_BOUNDARY": 2,
       "UTT_BOUNDARY": 3,
+      "n": 4,
+      "o": 5,
+      "t": 6,
+      "ʃ": 7,
       "a": 8,
+      "uː": 9,
+      "m": 10,
+      "u": 11,
+      "tʲ": 12,
+      "i": 13,
+      "s": 14,
+      "eː": 15,
+      "d": 16,
+      "iː": 17,
+      "k": 18,
+      "ɡ": 19,
+      "ɑ": 20,
+      "ɤ": 21,
+      "ʊ": 22,
+      "sʲ": 23,
+      "j": 24,
+      "aː": 25,
+      "h": 26,
+      "v": 27,
+      "æi": 28,
+      "kː": 29,
+      "e": 30,
+      "ɪ": 31,
+      "tː": 32,
+      "r": 33,
+      "ɛ": 34,
+      "mː": 35,
+      "p": 36,
+      "sː": 37,
+      "æ": 38,
+      "l": 39,
+      "pː": 40,
+      "yː": 41,
+      "æː": 42,
+      "b": 43,
+      "ɔ": 44,
+      "ɤː": 45,
+      "lː": 46,
+      "ø": 47,
+      "øː": 48,
+      "ŋ": 49,
+      "y": 50,
+      "oː": 51,
+      "rː": 52,
+      "ɲ": 53,
+      "nː": 54,
+      "w": 55,
+      "tʲː": 56,
+      "øɪ̯": 57,
+      "f": 58,
+      "dʲ": 59,
+      "sʲː": 60,
+      "t̠ʃ": 61,
+      "ʃː": 62,
+      "ʒ": 63,
+      "z": 64,
+      "fː": 65,
+      "dː": 66,
+      "yi": 67
     },
     "unk_token": "UNK"
   }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0948cb11c8ef3cbbac150d6b065e41c48eafcb71430ca203d424013a0dc9d431
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:de9f2ff350f6b9b053d00cc795964f932021ce06c9925b1fc687b3465fc34f31
 size 5368

vocab.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"s":4,"i":5,"n":6,"m":7,"a":8,"j":9,"u":10,"k":11,"o":12,"h":13,"l":14,"t":15,"w":16,"~~d̠ʒ~~":17,"ŋ":18,"ə":19,"d":20,"p":21,"ɡ":22,"b":23,"r":24,"ɲ":25,"~~t̠ʃ~~":26,"f":27,"z":28,"ʃ":29,"x":30}

+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"n":4,"o":5,"t":6,"ʃ":7,"a":8,"uː":9,"m":10,"u":11,"tʲ":12,"i":13,"s":14,"eː":15,"d":16,"iː":17,"k":18,"ɡ":19,"ɑ":20,"ɤ":21,"ʊ":22,"sʲ":23,"j":24,"aː":25,"h":26,"v":27,"æi":28,"kː":29,"e":30,"ɪ":31,"tː":32,"r":33,"ɛ":34,"mː":35,"p":36,"sː":37,"æ":38,"l":39,"pː":40,"yː":41,"æː":42,"b":43,"ɔ":44,"ɤː":45,"lː":46,"ø":47,"øː":48,"ŋ":49,"y":50,"oː":51,"rː":52,"ɲ":53,"nː":54,"w":55,"tʲː":56,"øɪ̯":57,"f":58,"dʲ":59,"sʲː":60,"t̠ʃ":61,"ʃː":62,"ʒ":63,"z":64,"fː":65,"dː":66,"yi":67}