dataset: s3://scale-ml/users/niklas/swe-agent/train/qwen-2000imitation-student/with_pr/masked/961dagger-2000imitation debug: false eval_at_step_zero: null eval_case_report: false eval_case_report_only_splits: [] eval_mode: false eval_steps: 0.2 eval_strategy: steps experimental: activation_checkpointing: true enable_context_parallel: false mask_input_ids_by_flag: true pad_to_max_length: false pipeline_parallel_reshard: false pipeline_parallel_schedule: gpipe pipeline_parallel_size: 1 pp_last_stage_offset: 0 pp_share_train_eval_schedule: true torch_compile: false hyperparams: adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 adam_weight_decay: null constant_pack: false eval_num_rollouts_per_prompt: 1 gradient_accumulation_steps: 1 learning_rate: 5.0e-05 loss_form: null lr_scheduler_kwargs: null lr_scheduler_type: cosine mask_instruct: true max_grad_norm: 1.0 max_length: 32768 num_rollouts_per_prompt: 1 num_train_epochs: 3 num_train_steps: -1 online: false optimizer: adam per_device_eval_batch_size: 1 per_device_micro_batch_size: 1 per_device_train_batch_size: 1 sleep_level: 2 warmup_ratio: 0.05 weight_decay: 0.01 local_output_path: /mnt/nvme logging_rollouts: 0 logging_steps: 1 model_squad: lm: activation_checkpointing: true model_path: s3://scale-ml/users/niklas/models/smith-claude-expert/2000imitation-lr5e-5-batch16/checkpoints/checkpoint-375/ parallel_state: cp_mesh: null cp_size: 1 device_type: cuda dp_size: 16 pp_size: 1 world_mesh: !!python/object:torch.distributed.device_mesh.DeviceMesh _coordinate_on_dim: - 0 _dim_group_infos: - !!python/tuple - ptd:0 - - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - '0' _flatten_mesh_list: !!python/tuple - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 _hash: -8305722318908533129 _thread_id: null device_type: cuda mesh: !!python/object/apply:torch._utils._rebuild_tensor_v2 - !!python/object/apply:torch.storage._load_from_bytes - !!binary | gAKKCmz8nEb5IGqoUBkugAJN6QMugAJ9cQAoWBAAAABwcm90b2NvbF92ZXJzaW9ucQFN6QNYDQAA AGxpdHRsZV9lbmRpYW5xAohYCgAAAHR5cGVfc2l6ZXNxA31xBChYBQAAAHNob3J0cQVLAlgDAAAA aW50cQZLBFgEAAAAbG9uZ3EHSwR1dS6AAihYBwAAAHN0b3JhZ2VxAGN0b3JjaApJbnRTdG9yYWdl CnEBWA8AAAAxMDI1MzIxMjY3NjY4MDBxAlgDAAAAY3B1cQNLEE50cQRRLoACXXEAWA8AAAAxMDI1 MzIxMjY3NjY4MDBxAWEuEAAAAAAAAAAAAAAAAQAAAAIAAAADAAAABAAAAAUAAAAGAAAABwAAAAgA AAAJAAAACgAAAAsAAAAMAAAADQAAAA4AAAAPAAAA - 0 - !!python/tuple - 16 - !!python/tuple - 1 - false - !!python/object/apply:collections.OrderedDict - [] mesh_dim_names: !!python/tuple - dp world_size: 16 torch_compile: false use_fsdp2: true use_scale_llama: false processing_interface: class_name: BaseProcessingInterface module_path: trainers.processing_interface remote_object: {} resume: false s3_output_path: s3://scale-ml/users/niklas/models/qwen-2000imitation-student/with_pr/masked/961dagger-2000imitation save_at_step_zero: null save_final_model: true save_hf: true save_lr_scheduler: true save_optimizer: true save_s3_async: true save_steps: 0.2 save_strategy: epoch task: sft use_device_mesh: true use_fsdp2: true use_scale_llama: false wandb: entity: gen-ai name: qwen2.5-961dagger-2000imitation-with_pr-masked-lr5e-5-batch16 project: agent-rlxf wandb_host: https://scaleai.wandb.io/ wandb_key_name: NIKLAS_WANDB_API_KEY wandb_secretsmanager_location: team/GENAIML/secret-store-key