diff --git a/checkpoint-12/README.md b/checkpoint-12/README.md new file mode 100644 index 0000000000000000000000000000000000000000..816640edc11973d72c8818256065da4e2ec568cf --- /dev/null +++ b/checkpoint-12/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-2-7b-chat-hf +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-12/adapter_config.json b/checkpoint-12/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..670009ba072d036f4df43fdc2bda8fa966e97a0f --- /dev/null +++ b/checkpoint-12/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-12/adapter_model.safetensors b/checkpoint-12/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb323494ced41997e299d6da08d18a140d0c58ae --- /dev/null +++ b/checkpoint-12/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db74327da08c5733d3671d78bf0bfd5aca1e619d2e5f004b9bd81dd7840e9687 +size 16796376 diff --git a/checkpoint-12/optimizer.pt b/checkpoint-12/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd805910e5454b940c2937287d1ffc7bd7dfb039 --- /dev/null +++ b/checkpoint-12/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5684950bc9ff83f6c5ddd4311d297664dd5a6bb867d6340cc7c64145e8b02e +size 33662074 diff --git a/checkpoint-12/rng_state.pth b/checkpoint-12/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0943d810cd5d294ec3e82c2b73982a365f3deb0e --- /dev/null +++ b/checkpoint-12/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80fa41a67e12b2b3ead5635de716a7680a0cda8b3b966bf0b85209e763ea279f +size 14244 diff --git a/checkpoint-12/scheduler.pt b/checkpoint-12/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc9238e9b46513c17a70196f10e99cc33b04faed --- /dev/null +++ b/checkpoint-12/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa66395f286e5659316f995e715a4488274c2ca7b246da85398436971aec7d0 +size 1064 diff --git a/checkpoint-12/trainer_state.json b/checkpoint-12/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd2ba11717aeef1952048de6e01f5c94a9955842 --- /dev/null +++ b/checkpoint-12/trainer_state.json @@ -0,0 +1,65 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 12, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, + "step": 3 + }, + { + "epoch": 2.0, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, + "step": 6 + }, + { + "epoch": 3.0, + "eval_loss": 14.846773147583008, + "eval_runtime": 3.0494, + "eval_samples_per_second": 2.951, + "eval_steps_per_second": 2.951, + "step": 9 + }, + { + "epoch": 4.0, + "eval_loss": 14.293635368347168, + "eval_runtime": 3.0571, + "eval_samples_per_second": 2.944, + "eval_steps_per_second": 2.944, + "step": 12 + } + ], + "logging_steps": 500, + "max_steps": 20, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6418163658915840.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-12/training_args.bin b/checkpoint-12/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..abfdebb736623bf923c829e38e823f3f359d1900 --- /dev/null +++ b/checkpoint-12/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf +size 5368 diff --git a/checkpoint-15/README.md b/checkpoint-15/README.md new file mode 100644 index 0000000000000000000000000000000000000000..816640edc11973d72c8818256065da4e2ec568cf --- /dev/null +++ b/checkpoint-15/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-2-7b-chat-hf +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-15/adapter_config.json b/checkpoint-15/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..670009ba072d036f4df43fdc2bda8fa966e97a0f --- /dev/null +++ b/checkpoint-15/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-15/adapter_model.safetensors b/checkpoint-15/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eb99592a2adbacf264e071a67bc2cbf4aa95e5c --- /dev/null +++ b/checkpoint-15/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db9e1a46a8e22d84743eed902f36391fb9db201b3422c4f61074057e68a8d7c +size 16796376 diff --git a/checkpoint-15/optimizer.pt b/checkpoint-15/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1ee540e02741543d8291fd92280b4c67abc66b0 --- /dev/null +++ b/checkpoint-15/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eeb08705c660e50909f33a21ba870a8cce6fc81f3bfafbae2538f68ca5be5c4 +size 33662074 diff --git a/checkpoint-15/rng_state.pth b/checkpoint-15/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..029a2ce0fe8863bba87295347137328f182d0619 --- /dev/null +++ b/checkpoint-15/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa516c2f82001617f39d814f02803bf369d6f37f952789e1909dde66a34e0094 +size 14244 diff --git a/checkpoint-15/scheduler.pt b/checkpoint-15/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cde89937053dc64127e5cf5f980452cf2d3ba6b1 --- /dev/null +++ b/checkpoint-15/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0434e3e2b1ab78aff01280081494c21eb53c25b0fea01cea4948945f2289374b +size 1064 diff --git a/checkpoint-15/trainer_state.json b/checkpoint-15/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4948513f70ec8786ea17c2fc49ecb4f6fa2f5226 --- /dev/null +++ b/checkpoint-15/trainer_state.json @@ -0,0 +1,73 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 15, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, + "step": 3 + }, + { + "epoch": 2.0, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, + "step": 6 + }, + { + "epoch": 3.0, + "eval_loss": 14.846773147583008, + "eval_runtime": 3.0494, + "eval_samples_per_second": 2.951, + "eval_steps_per_second": 2.951, + "step": 9 + }, + { + "epoch": 4.0, + "eval_loss": 14.293635368347168, + "eval_runtime": 3.0571, + "eval_samples_per_second": 2.944, + "eval_steps_per_second": 2.944, + "step": 12 + }, + { + "epoch": 5.0, + "eval_loss": 13.908576965332031, + "eval_runtime": 3.0589, + "eval_samples_per_second": 2.942, + "eval_steps_per_second": 2.942, + "step": 15 + } + ], + "logging_steps": 500, + "max_steps": 20, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8022704573644800.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-15/training_args.bin b/checkpoint-15/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..abfdebb736623bf923c829e38e823f3f359d1900 --- /dev/null +++ b/checkpoint-15/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf +size 5368 diff --git a/checkpoint-18/README.md b/checkpoint-18/README.md new file mode 100644 index 0000000000000000000000000000000000000000..816640edc11973d72c8818256065da4e2ec568cf --- /dev/null +++ b/checkpoint-18/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-2-7b-chat-hf +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-18/adapter_config.json b/checkpoint-18/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..670009ba072d036f4df43fdc2bda8fa966e97a0f --- /dev/null +++ b/checkpoint-18/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-18/adapter_model.safetensors b/checkpoint-18/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96e87a30c4ffb97a554b52413c788533b8553f13 --- /dev/null +++ b/checkpoint-18/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42bc4c1793f45793f59a616958c4abc76114dc68464bfda25aeca5c86971ddd0 +size 16796376 diff --git a/checkpoint-18/optimizer.pt b/checkpoint-18/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..02c24ffa9c54f4de0639841ada70dd1110e39240 --- /dev/null +++ b/checkpoint-18/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cead76513a5a2dd33343332f68691e9f63f68e297c7826df8d6d59a5e33edcc1 +size 33662074 diff --git a/checkpoint-18/rng_state.pth b/checkpoint-18/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85b40558de9bdbfbfb3da043ac5b60f2ce21cf2b --- /dev/null +++ b/checkpoint-18/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811a9dc92b66f9a5c463d97ecdc438ebc0381b27ce0e537fe22e7ab9521e8ebb +size 14244 diff --git a/checkpoint-18/scheduler.pt b/checkpoint-18/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce0005ed747718dca93542b461a26625e054dede --- /dev/null +++ b/checkpoint-18/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0bf0b956056fba76de1a083acc2f6179fef0c369803458d9025730de7a3715 +size 1064 diff --git a/checkpoint-18/trainer_state.json b/checkpoint-18/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..12f469f26ad3a7100cee7b43f24518691b98efdf --- /dev/null +++ b/checkpoint-18/trainer_state.json @@ -0,0 +1,81 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 18, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, + "step": 3 + }, + { + "epoch": 2.0, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, + "step": 6 + }, + { + "epoch": 3.0, + "eval_loss": 14.846773147583008, + "eval_runtime": 3.0494, + "eval_samples_per_second": 2.951, + "eval_steps_per_second": 2.951, + "step": 9 + }, + { + "epoch": 4.0, + "eval_loss": 14.293635368347168, + "eval_runtime": 3.0571, + "eval_samples_per_second": 2.944, + "eval_steps_per_second": 2.944, + "step": 12 + }, + { + "epoch": 5.0, + "eval_loss": 13.908576965332031, + "eval_runtime": 3.0589, + "eval_samples_per_second": 2.942, + "eval_steps_per_second": 2.942, + "step": 15 + }, + { + "epoch": 6.0, + "eval_loss": 13.70132064819336, + "eval_runtime": 3.0763, + "eval_samples_per_second": 2.926, + "eval_steps_per_second": 2.926, + "step": 18 + } + ], + "logging_steps": 500, + "max_steps": 20, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9627245488373760.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-18/training_args.bin b/checkpoint-18/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..abfdebb736623bf923c829e38e823f3f359d1900 --- /dev/null +++ b/checkpoint-18/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf +size 5368 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..816640edc11973d72c8818256065da4e2ec568cf --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-2-7b-chat-hf +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..670009ba072d036f4df43fdc2bda8fa966e97a0f --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b605eb854bd7d03aa7c831ee4fab379eda8775a --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714cfa2ea0779f51a513ccfe1e590738595728881619e5328ed61cdc15a9b3ef +size 16796376 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..96d814e468b46d1275ddac8f04b337063cbc8013 --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cef1671f2eb59c5c7d85f7209b245055aa2bf1c38f5e49a8793225a1dd96541 +size 33662074 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..18fadf609071721c6d1aaa72db93cf31b5bef746 --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84bc53e40bb88dcd0bf447031e1530782b41d31bb0dd69fe081bab4198eb1cac +size 14244 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f9c08493ec98b31cae891a0a418dde69c063c2e --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107345441c052976110ff33fea5387a7bb0c66d837cfd27fc624724b149520ed +size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..98ee9b333ea75f11224c6b7f8c67ba00e94dc42c --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,89 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.810126582278481, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, + "step": 3 + }, + { + "epoch": 2.0, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, + "step": 6 + }, + { + "epoch": 3.0, + "eval_loss": 14.846773147583008, + "eval_runtime": 3.0494, + "eval_samples_per_second": 2.951, + "eval_steps_per_second": 2.951, + "step": 9 + }, + { + "epoch": 4.0, + "eval_loss": 14.293635368347168, + "eval_runtime": 3.0571, + "eval_samples_per_second": 2.944, + "eval_steps_per_second": 2.944, + "step": 12 + }, + { + "epoch": 5.0, + "eval_loss": 13.908576965332031, + "eval_runtime": 3.0589, + "eval_samples_per_second": 2.942, + "eval_steps_per_second": 2.942, + "step": 15 + }, + { + "epoch": 6.0, + "eval_loss": 13.70132064819336, + "eval_runtime": 3.0763, + "eval_samples_per_second": 2.926, + "eval_steps_per_second": 2.926, + "step": 18 + }, + { + "epoch": 6.810126582278481, + "eval_loss": 13.690178871154785, + "eval_runtime": 3.1376, + "eval_samples_per_second": 2.868, + "eval_steps_per_second": 2.868, + "step": 20 + } + ], + "logging_steps": 500, + "max_steps": 20, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.092712673574912e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..abfdebb736623bf923c829e38e823f3f359d1900 --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf +size 5368 diff --git a/checkpoint-3/adapter_model.safetensors b/checkpoint-3/adapter_model.safetensors index 4396aeb4c294e41ae4ba329becb6a2bd1d859372..b5966bfd084e95d31a31917fc4e53e7167016b70 100644 --- a/checkpoint-3/adapter_model.safetensors +++ b/checkpoint-3/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ced072541fb0832ffdae1c95ab15db25d239b0bd9f17e89b4311ac3bae06ea8c +oid sha256:6987e5a5af262e920ba3014ca049ef1cbe20bf04ec3ade690036ea8d652ff600 size 16796376 diff --git a/checkpoint-3/optimizer.pt b/checkpoint-3/optimizer.pt index b899bff6251d207614db3a33912952e27266f336..979e16e9ff229b0ee5a88d9b817af643950cd16d 100644 --- a/checkpoint-3/optimizer.pt +++ b/checkpoint-3/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f0505c0935ab2f6793aff8eadb6a481c85c4601cede66bb9ec3260ad33bc5be +oid sha256:3583f8fd34d5b7a400edf73a01cd24d5534998c4ac64a0bc195f4770ab8a21ca size 33662074 diff --git a/checkpoint-3/scheduler.pt b/checkpoint-3/scheduler.pt index 70fd626e02354644c78ed88469a49f162d44ad69..a81af0eb63f261dde7221fc505c93b07a26a0728 100644 --- a/checkpoint-3/scheduler.pt +++ b/checkpoint-3/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ccd8222543155d3562cbc0f00ce970f9119e4c80a6209d29eb44089577d97989 +oid sha256:8d148a17afdf5a74924046882f42e1d89ff000c1c0fc4065536cfdb290a4d904 size 1064 diff --git a/checkpoint-3/trainer_state.json b/checkpoint-3/trainer_state.json index d32853ea94c25fe2858632e69a97060bca157cf4..dee6c63450cce43139d69886aafb8998a23b3adb 100644 --- a/checkpoint-3/trainer_state.json +++ b/checkpoint-3/trainer_state.json @@ -10,17 +10,17 @@ "log_history": [ { "epoch": 1.0, - "eval_loss": 16.88850212097168, - "eval_runtime": 3.0761, - "eval_samples_per_second": 2.926, - "eval_steps_per_second": 2.926, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, "step": 3 } ], "logging_steps": 500, - "max_steps": 6, + "max_steps": 20, "num_input_tokens_seen": 0, - "num_train_epochs": 3, + "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { diff --git a/checkpoint-3/training_args.bin b/checkpoint-3/training_args.bin index 0370dfac5226cce3f6f3326bafa45904ee12dd8c..abfdebb736623bf923c829e38e823f3f359d1900 100644 --- a/checkpoint-3/training_args.bin +++ b/checkpoint-3/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb0f1fba9332926cc8d0226eba0e5260c50761407ef337f5eb5e868e5a1368b4 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf size 5368 diff --git a/checkpoint-6/adapter_model.safetensors b/checkpoint-6/adapter_model.safetensors index a59a83b8dfc460ccc86ea6588eb1b9fb7027668e..cb7994407fbcfa9f03bea563f7856fb7459568ea 100644 --- a/checkpoint-6/adapter_model.safetensors +++ b/checkpoint-6/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10e8b16ab86f3f50b06b580971031729964acb8ad94452fd1346701b866f2058 +oid sha256:150c4c6e6b0254c4df0a1ee58568979431817598b2e7a7c0d65630aabcb42242 size 16796376 diff --git a/checkpoint-6/optimizer.pt b/checkpoint-6/optimizer.pt index e1623a5c78d89d266e18a812033ab28d6ce7e987..567638f5e5e33b07b8d1e39776ea0f903662d645 100644 --- a/checkpoint-6/optimizer.pt +++ b/checkpoint-6/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64b8a7cc427cc35c6166ac4392488123056062e6d98db2505004a2ae9fea3680 +oid sha256:e9fb9d31c3e9b7723337f8bbc65945dedf012abd05515635229e480bb44e536b size 33662074 diff --git a/checkpoint-6/scheduler.pt b/checkpoint-6/scheduler.pt index 7909141bf2ba438e64b3865f2878f469d4d63bc4..1f5e5a7307471080c39be1009d225627cba88da9 100644 --- a/checkpoint-6/scheduler.pt +++ b/checkpoint-6/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10c80bb1428cebac08ee1924431e250465b7029be8b1c863bc531b45e2096235 +oid sha256:555db904f634ba80f5a5b55ecb9eda757ee7fbf550a63df1171681188699b3e1 size 1064 diff --git a/checkpoint-6/trainer_state.json b/checkpoint-6/trainer_state.json index b5c21231f005745fbdf01a4b125a7f8af0cf9c68..56f5e5a116a8421f24a98a026d928b8567469e7f 100644 --- a/checkpoint-6/trainer_state.json +++ b/checkpoint-6/trainer_state.json @@ -10,25 +10,25 @@ "log_history": [ { "epoch": 1.0, - "eval_loss": 16.88850212097168, - "eval_runtime": 3.0761, - "eval_samples_per_second": 2.926, - "eval_steps_per_second": 2.926, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, "step": 3 }, { "epoch": 2.0, - "eval_loss": 16.651338577270508, - "eval_runtime": 3.1083, - "eval_samples_per_second": 2.895, - "eval_steps_per_second": 2.895, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, "step": 6 } ], "logging_steps": 500, - "max_steps": 6, + "max_steps": 20, "num_input_tokens_seen": 0, - "num_train_epochs": 3, + "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { @@ -37,7 +37,7 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": true + "should_training_stop": false }, "attributes": {} } diff --git a/checkpoint-6/training_args.bin b/checkpoint-6/training_args.bin index 0370dfac5226cce3f6f3326bafa45904ee12dd8c..abfdebb736623bf923c829e38e823f3f359d1900 100644 --- a/checkpoint-6/training_args.bin +++ b/checkpoint-6/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb0f1fba9332926cc8d0226eba0e5260c50761407ef337f5eb5e868e5a1368b4 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf size 5368 diff --git a/checkpoint-9/README.md b/checkpoint-9/README.md new file mode 100644 index 0000000000000000000000000000000000000000..816640edc11973d72c8818256065da4e2ec568cf --- /dev/null +++ b/checkpoint-9/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-2-7b-chat-hf +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/checkpoint-9/adapter_config.json b/checkpoint-9/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..670009ba072d036f4df43fdc2bda8fa966e97a0f --- /dev/null +++ b/checkpoint-9/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-9/adapter_model.safetensors b/checkpoint-9/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47a1191dad1714f7ac75d32e5057a30d3e3c1b1c --- /dev/null +++ b/checkpoint-9/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24c2bdd431d60829192163ec399f686f1c104893abe5846dda81e2e139324c2 +size 16796376 diff --git a/checkpoint-9/optimizer.pt b/checkpoint-9/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d584af938c446a1491add573045eb60b5253281 --- /dev/null +++ b/checkpoint-9/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8386c58e1dca60c30ebb6fd0fefcf5a30c90ff1778ae9b910ce596d69ab10b43 +size 33662074 diff --git a/checkpoint-9/rng_state.pth b/checkpoint-9/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..953080b73f1d94c486da67d50cd4a25ace20b2f8 --- /dev/null +++ b/checkpoint-9/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012ddd1962aa80669901fb9c50e8436cf1b1ab1abb0f5a6b4580168a47b67876 +size 14244 diff --git a/checkpoint-9/scheduler.pt b/checkpoint-9/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c6e1c80bbb61bdb37700967102ac6dfc41e763e --- /dev/null +++ b/checkpoint-9/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a03dc137f35b92d737f6dbec9d4bc49e81c7dfe4179ed290c5be1563d6d2289 +size 1064 diff --git a/checkpoint-9/trainer_state.json b/checkpoint-9/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d67d2b2909909bccdd1f2ab98449cc81597a8301 --- /dev/null +++ b/checkpoint-9/trainer_state.json @@ -0,0 +1,57 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 9, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_loss": 15.98961353302002, + "eval_runtime": 3.0384, + "eval_samples_per_second": 2.962, + "eval_steps_per_second": 2.962, + "step": 3 + }, + { + "epoch": 2.0, + "eval_loss": 15.178080558776855, + "eval_runtime": 3.0446, + "eval_samples_per_second": 2.956, + "eval_steps_per_second": 2.956, + "step": 6 + }, + { + "epoch": 3.0, + "eval_loss": 14.846773147583008, + "eval_runtime": 3.0494, + "eval_samples_per_second": 2.951, + "eval_steps_per_second": 2.951, + "step": 9 + } + ], + "logging_steps": 500, + "max_steps": 20, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4813622744186880.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-9/training_args.bin b/checkpoint-9/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..abfdebb736623bf923c829e38e823f3f359d1900 --- /dev/null +++ b/checkpoint-9/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0bfe32988fa49c65f6f81dd078135bc15ce8257781eebbf886241fa29085bf +size 5368 diff --git a/tokenizer.json b/tokenizer.json index 263b290d671b305224d8dee7f75d35580f3ff802..6b684f885f88f239c3ce65d728241672132152b6 100644 --- a/tokenizer.json +++ b/tokenizer.json @@ -1,21 +1,7 @@ { "version": "1.0", - "truncation": { - "direction": "Right", - "max_length": 512, - "strategy": "LongestFirst", - "stride": 0 - }, - "padding": { - "strategy": { - "Fixed": 512 - }, - "direction": "Right", - "pad_to_multiple_of": null, - "pad_id": 2, - "pad_type_id": 0, - "pad_token": "" - }, + "truncation": null, + "padding": null, "added_tokens": [ { "id": 0,