alpha: 0.2
base_model: meta-llama/Llama-3.2-1B-Instruct
custom_name: d4-a0.2-v4
dtype: bfloat16
lambdas:
- 1.0
- 1.0
- 1.0
lora_config: null
loss_types:
- anti-watermark
- anti-watermark
meta_learning: false
meta_learning_config: null
n_wm_tokens: 0
proportions:
- 0.5
- 0.2
- 0.3
regularization_datasets:
- !!python/object/apply:finetuning.dataset.DatasetType
  - AlpacaGPT4
- !!python/object/apply:finetuning.dataset.DatasetType
  - WO_NoHealth
sequence_length: 512
streaming: true
training_args:
  bf16: false
  do_train: true
  fp16: false
  gradient_accumulation_steps: 16
  gradient_checkpointing: false
  hub_strategy: all_checkpoints
  learning_rate: 2.0e-05
  logging_steps: 10
  lr_scheduler_type: cosine
  max_steps: 2500
  num_train_epochs: 1
  optim: adafactor
  output_dir: Grogros/dmWM-llama-3.2-1B-Instruct-WOHealth-Al4-NH-WO-d4-a0.2-v4
  overwrite_output_dir: true
  per_device_train_batch_size: 4
  push_to_hub: true
  report_to: none
  save_steps: 500
  save_strategy: steps
  warmup_ratio: 0.1
watermark_datasets:
- !!python/object/apply:finetuning.dataset.DatasetType
  - WOHealth
watermark_eval_config: []