alpha: 0.2 base_model: meta-llama/Llama-3.2-1B-Instruct custom_name: d4-a0.2-v4 dtype: bfloat16 lambdas: - 1.0 - 1.0 - 1.0 lora_config: null loss_types: - anti-watermark - anti-watermark meta_learning: false meta_learning_config: null n_wm_tokens: 0 proportions: - 0.5 - 0.2 - 0.3 regularization_datasets: - !!python/object/apply:finetuning.dataset.DatasetType - AlpacaGPT4 - !!python/object/apply:finetuning.dataset.DatasetType - WO_NoHealth sequence_length: 512 streaming: true training_args: bf16: false do_train: true fp16: false gradient_accumulation_steps: 16 gradient_checkpointing: false hub_strategy: all_checkpoints learning_rate: 2.0e-05 logging_steps: 10 lr_scheduler_type: cosine max_steps: 2500 num_train_epochs: 1 optim: adafactor output_dir: Grogros/dmWM-llama-3.2-1B-Instruct-WOHealth-Al4-NH-WO-d4-a0.2-v4 overwrite_output_dir: true per_device_train_batch_size: 4 push_to_hub: true report_to: none save_steps: 500 save_strategy: steps warmup_ratio: 0.1 watermark_datasets: - !!python/object/apply:finetuning.dataset.DatasetType - WOHealth watermark_eval_config: []