Upload 3 files
Browse files- G0_Plus_P&P/config.yaml +234 -0
- G0_Plus_P&P/dataset_statistics.json +0 -0
- G0_Plus_P&P/model_282276.pt +3 -0
G0_Plus_P&P/config.yaml
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TAG: debug
|
| 2 |
+
LOG_DIR: tensorboard_logs
|
| 3 |
+
seed: 7
|
| 4 |
+
vla_path: paligemma-3b-pt-224
|
| 5 |
+
data_root_dir: /galaxea_dataset/galaxea/pp_project/rlds_334_tasks_distributed/
|
| 6 |
+
dataset_name: bbox_training_r1_lite_5_parts
|
| 7 |
+
run_root_dir: runs/base
|
| 8 |
+
adapter_tmp_dir: adapter_tmp_weights
|
| 9 |
+
hf_token: /galaxea_fulltime/share/.hf_token
|
| 10 |
+
ckpt: /galaxea_fulltime/pretrained_ckpts/pi0_libero/pi0_torch_state.pt
|
| 11 |
+
use_lora: false
|
| 12 |
+
lora_rank: 32
|
| 13 |
+
lora_dropout: 0.0
|
| 14 |
+
use_quantization: false
|
| 15 |
+
enable_bf16: true
|
| 16 |
+
model_param_to_bf16: false
|
| 17 |
+
vla_training_strategy: vla-full-train
|
| 18 |
+
weight_decay: 1.0e-06
|
| 19 |
+
batch_size: 4
|
| 20 |
+
grad_accumulation_steps: 1
|
| 21 |
+
learning_rate: 2.5e-05
|
| 22 |
+
warmup_steps: 500
|
| 23 |
+
lr_scheduler_type: cosine
|
| 24 |
+
image_aug: true
|
| 25 |
+
max_epochs: 8
|
| 26 |
+
save_steps: 23523
|
| 27 |
+
log_steps: 100
|
| 28 |
+
use_torch_compile: false
|
| 29 |
+
wandb_project: 1101_pnp_rla_image_condition_376_tasks_5_parts
|
| 30 |
+
wandb_entity: cuijianning1996-galaxea-ai
|
| 31 |
+
exp_name: 376_tasks_img_as_cond_with_randomly_rotated_bbox
|
| 32 |
+
use_ema: false
|
| 33 |
+
ema:
|
| 34 |
+
update_after_step: 0
|
| 35 |
+
power: 0.67
|
| 36 |
+
DATASET:
|
| 37 |
+
robot_cfg:
|
| 38 |
+
with_left_arm: true
|
| 39 |
+
with_right_arm: true
|
| 40 |
+
with_torso: false
|
| 41 |
+
with_chassis: false
|
| 42 |
+
use_relative_joint_action: true
|
| 43 |
+
window_size: 1
|
| 44 |
+
future_action_window_size: 31
|
| 45 |
+
camera_views:
|
| 46 |
+
- head_condition
|
| 47 |
+
- head
|
| 48 |
+
- wrist_left
|
| 49 |
+
- wrist_right
|
| 50 |
+
shuffle_buffer_size: 10000
|
| 51 |
+
balance_weights: false
|
| 52 |
+
use_last_action: false
|
| 53 |
+
share_datasets_statistics: true
|
| 54 |
+
short_prompt: true
|
| 55 |
+
aug_instruction_kwargs:
|
| 56 |
+
drop_high_level_prob: 1.0
|
| 57 |
+
bbox_as_instruction: false
|
| 58 |
+
image_condition: true
|
| 59 |
+
image_condition_lang_prefix: Pick the object in the first image and place into
|
| 60 |
+
the tableware.
|
| 61 |
+
bbox_jitter_ratio: 0.0
|
| 62 |
+
action_proprio_normalization_type: normal
|
| 63 |
+
use_pretrained_data_stats: false
|
| 64 |
+
proprio_noise_std: 0.05
|
| 65 |
+
image_augment_kwargs:
|
| 66 |
+
head:
|
| 67 |
+
random_brightness:
|
| 68 |
+
- 0.2
|
| 69 |
+
random_contrast:
|
| 70 |
+
- 0.8
|
| 71 |
+
- 1.2
|
| 72 |
+
random_saturation:
|
| 73 |
+
- 0.8
|
| 74 |
+
- 1.2
|
| 75 |
+
random_hue:
|
| 76 |
+
- 0.05
|
| 77 |
+
augment_order:
|
| 78 |
+
- random_brightness
|
| 79 |
+
- random_contrast
|
| 80 |
+
- random_saturation
|
| 81 |
+
- random_hue
|
| 82 |
+
wrist_left:
|
| 83 |
+
random_brightness:
|
| 84 |
+
- 0.2
|
| 85 |
+
random_contrast:
|
| 86 |
+
- 0.8
|
| 87 |
+
- 1.2
|
| 88 |
+
random_saturation:
|
| 89 |
+
- 0.8
|
| 90 |
+
- 1.2
|
| 91 |
+
random_hue:
|
| 92 |
+
- 0.05
|
| 93 |
+
random_drop_all_image:
|
| 94 |
+
- 0.3
|
| 95 |
+
augment_order:
|
| 96 |
+
- random_drop_all_image
|
| 97 |
+
- random_brightness
|
| 98 |
+
- random_contrast
|
| 99 |
+
- random_saturation
|
| 100 |
+
- random_hue
|
| 101 |
+
wrist_right:
|
| 102 |
+
random_brightness:
|
| 103 |
+
- 0.2
|
| 104 |
+
random_contrast:
|
| 105 |
+
- 0.8
|
| 106 |
+
- 1.2
|
| 107 |
+
random_saturation:
|
| 108 |
+
- 0.8
|
| 109 |
+
- 1.2
|
| 110 |
+
random_hue:
|
| 111 |
+
- 0.05
|
| 112 |
+
random_drop_all_image:
|
| 113 |
+
- 0.3
|
| 114 |
+
augment_order:
|
| 115 |
+
- random_drop_all_image
|
| 116 |
+
- random_brightness
|
| 117 |
+
- random_contrast
|
| 118 |
+
- random_saturation
|
| 119 |
+
- random_hue
|
| 120 |
+
model_family: galaxea_zero
|
| 121 |
+
MODEL:
|
| 122 |
+
name: vla.galaxea_zero.GalaxeaZeroWrapper
|
| 123 |
+
vla_name: "paligemma-3b-pt-224"
|
| 124 |
+
load_inside: False
|
| 125 |
+
pretrained_model_path: /galaxea_fulltime/pretrained_ckpts/cache/paligemma-3b-pt-224
|
| 126 |
+
input_ids: True
|
| 127 |
+
action_expert_only: False
|
| 128 |
+
image_token_index: 257152
|
| 129 |
+
vocab_size: 257216
|
| 130 |
+
pad_token_id: 0
|
| 131 |
+
cond_steps: 1 # len proprio
|
| 132 |
+
horizon_steps: 32
|
| 133 |
+
action_dim: 26 # 2 x [QPOS (6) + gripper (1)] + Torso Velocity (6) + Chassis Velocity (6)
|
| 134 |
+
proprio_dim: 21 # 2 * [QPOS (6) + gripper (1)] + 4 (torso) + 3 (base vel) + last action(26)
|
| 135 |
+
max_text_tokens: 55 # 55 for galaxea0002
|
| 136 |
+
max_seq_len: ${eval:'${MODEL.num_input_images} * ${MODEL.vision.num_image_tokens} + ${MODEL.max_text_tokens}'}
|
| 137 |
+
max_image_text_tokens: ${MODEL.max_seq_len} # = ${max_seq_len}
|
| 138 |
+
action_decoder_layers: 2
|
| 139 |
+
flow_sampling: beta
|
| 140 |
+
num_inference_steps: 10
|
| 141 |
+
final_action_clip_value: null # data normalized in [-1,1]
|
| 142 |
+
action_expert_adaptive_mode: null
|
| 143 |
+
num_input_images: ${eval:'${DATASET.window_size} * len(${DATASET.camera_views})'} # $DATASET.window_size * LEN($DATASET.camera_views)
|
| 144 |
+
vision:
|
| 145 |
+
name: vla.model.paligemma.siglip.SiglipVisionModel
|
| 146 |
+
hidden_size: 1152 # siglip
|
| 147 |
+
intermediate_size: 4304
|
| 148 |
+
num_hidden_layers: 27
|
| 149 |
+
num_attention_heads: 16
|
| 150 |
+
num_channels: 3
|
| 151 |
+
image_size: 224
|
| 152 |
+
patch_size: 14
|
| 153 |
+
layer_norm_eps: 0.000001
|
| 154 |
+
attention_dropout: 0.0
|
| 155 |
+
num_image_tokens: 256
|
| 156 |
+
lora:
|
| 157 |
+
r: ${lora_rank}
|
| 158 |
+
dropout: ${lora_dropout}
|
| 159 |
+
use_quantize: False
|
| 160 |
+
use_lora: False
|
| 161 |
+
vision_projector:
|
| 162 |
+
name: vla.model.paligemma.siglip.PaliGemmaMultiModalProjector
|
| 163 |
+
vision_config:
|
| 164 |
+
hidden_size: 1152
|
| 165 |
+
projection_dim: 2048
|
| 166 |
+
lora:
|
| 167 |
+
r: ${lora_rank}
|
| 168 |
+
dropout: ${lora_dropout}
|
| 169 |
+
use_quantize: False
|
| 170 |
+
use_lora: False
|
| 171 |
+
joint:
|
| 172 |
+
name: vla.model.g0.joint_model.JointModel
|
| 173 |
+
action_expert_adaptive_mode: null
|
| 174 |
+
mixture:
|
| 175 |
+
vlm: # gemma
|
| 176 |
+
hidden_size: 2048
|
| 177 |
+
intermediate_size: 16384
|
| 178 |
+
use_final_norm: False
|
| 179 |
+
cache: True
|
| 180 |
+
use_quantize: False
|
| 181 |
+
use_lora: False
|
| 182 |
+
adaptive_mode: # not applicable for gemma
|
| 183 |
+
proprio:
|
| 184 |
+
hidden_size: 1024
|
| 185 |
+
intermediate_size: 4096
|
| 186 |
+
use_final_norm: True # technically no, but sharing weights with action anyway
|
| 187 |
+
cache: True
|
| 188 |
+
use_quantize: False
|
| 189 |
+
use_lora: False
|
| 190 |
+
adaptive_mode: null
|
| 191 |
+
action:
|
| 192 |
+
hidden_size: 1024
|
| 193 |
+
intermediate_size: 4096
|
| 194 |
+
use_final_norm: True
|
| 195 |
+
cache: False
|
| 196 |
+
use_quantize: False
|
| 197 |
+
use_lora: False
|
| 198 |
+
adaptive_mode: null
|
| 199 |
+
time_hidden_size: 256 # only applicable if using adaptive
|
| 200 |
+
lora:
|
| 201 |
+
r: ${lora_rank}
|
| 202 |
+
dropout: ${lora_dropout}
|
| 203 |
+
num_hidden_layers: 18
|
| 204 |
+
num_attention_heads: 8
|
| 205 |
+
num_key_value_heads: 1
|
| 206 |
+
head_dim: 256
|
| 207 |
+
max_position_embeddings: 8192
|
| 208 |
+
rms_norm_eps: 0.000001
|
| 209 |
+
rope_theta: 10000.0
|
| 210 |
+
attention_bias: False
|
| 211 |
+
attention_dropout: 0.0
|
| 212 |
+
pad_token_id: 0
|
| 213 |
+
#################################################################################################################
|
| 214 |
+
# For evaluation
|
| 215 |
+
#################################################################################################################
|
| 216 |
+
EVALUATION:
|
| 217 |
+
checkpoint: null # Pretrained checkpoint path
|
| 218 |
+
load_in_8bit: False # (For OpenVLA only) Load with 8-bit quantization
|
| 219 |
+
load_in_4bit: False # (For OpenVLA only) Load with 4-bit quantization
|
| 220 |
+
center_crop: True # Center crop? (if trained w/ random crop image aug)
|
| 221 |
+
#################################################################################################################
|
| 222 |
+
# LIBERO environment-specific parameters
|
| 223 |
+
#################################################################################################################
|
| 224 |
+
task_suite_name: "simpler_widowx" # Task suite. Options: libero_spatial, libero_object, libero_goal, libero_10, libero_90
|
| 225 |
+
num_steps_wait: 10 # Number of steps to wait for objects to stabilize in sim
|
| 226 |
+
num_trials_per_task: 24 # Number of rollouts per task
|
| 227 |
+
use_wrist_image: False
|
| 228 |
+
#################################################################################################################
|
| 229 |
+
# Utils
|
| 230 |
+
#################################################################################################################
|
| 231 |
+
run_id_note: None # Extra note to add in run ID for logging
|
| 232 |
+
local_log_dir: "./experiments/logs" # Local directory for eval logs
|
| 233 |
+
use_wandb: False # Whether to also log results in Weights & Biases
|
| 234 |
+
seed: 7 # Random Seed (for reproducibility)
|
G0_Plus_P&P/dataset_statistics.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
G0_Plus_P&P/model_282276.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbf962aa4693dfdb2cef7560ad36f5382a064ad69a45dad59c5c878599eee3b1
|
| 3 |
+
size 12957251217
|