Upload 3 files

Browse files

Files changed (3) hide show

G0_Plus_P&P/config.yaml +234 -0
G0_Plus_P&P/dataset_statistics.json +0 -0
G0_Plus_P&P/model_282276.pt +3 -0

G0_Plus_P&P/config.yaml ADDED Viewed

	@@ -0,0 +1,234 @@

+TAG: debug
+LOG_DIR: tensorboard_logs
+seed: 7
+vla_path: paligemma-3b-pt-224
+data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/
+dataset_name: bbox_training_r1_lite_5_parts
+run_root_dir: runs/base
+adapter_tmp_dir: adapter_tmp_weights
+hf_token: /galaxea_fulltime/share/.hf_token
+ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt
+use_lora: false
+lora_rank: 32
+lora_dropout: 0.0
+use_quantization: false
+enable_bf16: true
+model_param_to_bf16: false
+vla_training_strategy: vla-full-train
+weight_decay: 1.0e-06
+batch_size: 4
+grad_accumulation_steps: 1
+learning_rate: 2.5e-05
+warmup_steps: 500
+lr_scheduler_type: cosine
+image_aug: true
+max_epochs: 8
+save_steps: 23523
+log_steps: 100
+use_torch_compile: false
+wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts
+wandb_entity: cuijianning1996-galaxea-ai
+exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox
+use_ema: false
+ema:
+  update_after_step: 0
+  power: 0.67
+DATASET:
+  robot_cfg:
+    with_left_arm: true
+    with_right_arm: true
+    with_torso: false
+    with_chassis: false
+  use_relative_joint_action: true
+  window_size: 1
+  future_action_window_size: 31
+  camera_views:
+  - head_condition
+  - head
+  - wrist_left
+  - wrist_right
+  shuffle_buffer_size: 10000
+  balance_weights: false
+  use_last_action: false
+  share_datasets_statistics: true
+  short_prompt: true
+  aug_instruction_kwargs:
+    drop_high_level_prob: 1.0
+    bbox_as_instruction: false
+    image_condition: true
+    image_condition_lang_prefix: Pick the object in the first image and place into
+      the tableware.
+    bbox_jitter_ratio: 0.0
+  action_proprio_normalization_type: normal
+  use_pretrained_data_stats: false
+  proprio_noise_std: 0.05
+  image_augment_kwargs:
+    head:
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      augment_order:
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+    wrist_left:
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      random_drop_all_image:
+      - 0.3
+      augment_order:
+      - random_drop_all_image
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+    wrist_right:
+      random_brightness:
+      - 0.2
+      random_contrast:
+      - 0.8
+      - 1.2
+      random_saturation:
+      - 0.8
+      - 1.2
+      random_hue:
+      - 0.05
+      random_drop_all_image:
+      - 0.3
+      augment_order:
+      - random_drop_all_image
+      - random_brightness
+      - random_contrast
+      - random_saturation
+      - random_hue
+model_family: galaxea_zero
+MODEL:
+  name: vla.galaxea_zero.GalaxeaZeroWrapper
+  vla_name: "paligemma-3b-pt-224"
+  load_inside: False
+  pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224
+  input_ids: True
+  action_expert_only: False
+  image_token_index: 257152
+  vocab_size: 257216
+  pad_token_id: 0
+  cond_steps: 1 # len proprio
+  horizon_steps: 32
+  action_dim: 26 # 2 x [QPOS (6) + gripper (1)] + Torso Velocity (6) + Chassis Velocity (6)
+  proprio_dim: 21  # 2 * [QPOS (6) + gripper (1)] + 4 (torso) + 3 (base vel) + last action(26)
+  max_text_tokens: 55 # 55 for galaxea0002
+  max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'}
+  max_image_text_tokens: ${MODEL.max_seq_len} # = ${max_seq_len}
+  action_decoder_layers: 2
+  flow_sampling: beta
+  num_inference_steps: 10
+  final_action_clip_value: null  # data normalized in [-1,1]
+  action_expert_adaptive_mode: null
+  num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} # $DATASET.window_size * LEN($DATASET.camera_views)
+  vision:
+    name: vla.model.paligemma.siglip.SiglipVisionModel
+    hidden_size: 1152 # siglip
+    intermediate_size: 4304
+    num_hidden_layers: 27
+    num_attention_heads: 16
+    num_channels: 3
+    image_size: 224
+    patch_size: 14
+    layer_norm_eps: 0.000001
+    attention_dropout: 0.0
+    num_image_tokens: 256
+    lora:
+      r: ${lora_rank}
+      dropout: ${lora_dropout}
+    use_quantize: False
+    use_lora: False
+  vision_projector:
+    name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector
+    vision_config:
+      hidden_size: 1152
+      projection_dim: 2048
+    lora:
+      r: ${lora_rank}
+      dropout: ${lora_dropout}
+    use_quantize: False
+    use_lora: False
+  joint:
+    name: vla.model.g0.joint_model.JointModel
+    action_expert_adaptive_mode: null
+    mixture:
+      vlm:   # gemma
+        hidden_size: 2048
+        intermediate_size: 16384
+        use_final_norm: False
+        cache: True
+        use_quantize: False
+        use_lora: False
+        adaptive_mode:  # not applicable for gemma
+      proprio:
+        hidden_size: 1024
+        intermediate_size: 4096
+        use_final_norm: True  # technically no, but sharing weights with action anyway
+        cache: True
+        use_quantize: False
+        use_lora: False
+        adaptive_mode: null
+      action:
+        hidden_size: 1024
+        intermediate_size: 4096
+        use_final_norm: True
+        cache: False
+        use_quantize: False
+        use_lora: False
+        adaptive_mode: null
+    time_hidden_size: 256 # only applicable if using adaptive
+    lora:
+      r: ${lora_rank}
+      dropout: ${lora_dropout}
+    num_hidden_layers: 18
+    num_attention_heads: 8
+    num_key_value_heads: 1
+    head_dim: 256
+    max_position_embeddings: 8192
+    rms_norm_eps: 0.000001
+    rope_theta: 10000.0
+    attention_bias: False
+    attention_dropout: 0.0
+    pad_token_id: 0
+#################################################################################################################
+# For evaluation
+#################################################################################################################
+EVALUATION:
+  checkpoint: null     # Pretrained checkpoint path
+  load_in_8bit: False                       # (For OpenVLA only) Load with 8-bit quantization
+  load_in_4bit: False                       # (For OpenVLA only) Load with 4-bit quantization
+  center_crop: True                         # Center crop? (if trained w/ random crop image aug)
+  #################################################################################################################
+  # LIBERO environment-specific parameters
+  #################################################################################################################
+  task_suite_name: "simpler_widowx"          # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90
+  num_steps_wait: 10                         # Number of steps to wait for objects to stabilize in sim
+  num_trials_per_task: 24                    # Number of rollouts per task
+  use_wrist_image: False
+  #################################################################################################################
+  # Utils
+  #################################################################################################################
+  run_id_note: None                          # Extra note to add in run ID for logging
+  local_log_dir: "./experiments/logs"        # Local directory for eval logs
+  use_wandb: False                            # Whether to also log results in Weights & Biases
+  seed: 7                                    # Random Seed (for reproducibility)

G0_Plus_P&P/dataset_statistics.json ADDED Viewed

The diff for this file is too large to render. See raw diff

G0_Plus_P&P/model_282276.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbf962aa4693dfdb2cef7560ad36f5382a064ad69a45dad59c5c878599eee3b1
+size 12957251217