Add files using upload-large-folder tool
Browse files- .gitattributes +1 -0
- assets/head.png +3 -0
- data/agilex/hdf5totfrecords.py +114 -0
- data/empty_lang_embed.pt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/head.png filter=lfs diff=lfs merge=lfs -text
|
assets/head.png
ADDED
|
Git LFS Details
|
data/agilex/hdf5totfrecords.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
import h5py
|
| 3 |
+
import os
|
| 4 |
+
import fnmatch
|
| 5 |
+
import shutil
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
from multiprocessing import Pool
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _bytes_feature(value):
|
| 12 |
+
"""Returns a bytes_list from a string / byte."""
|
| 13 |
+
if isinstance(value, type(tf.constant(0))):
|
| 14 |
+
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
|
| 15 |
+
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _bool_feature(value):
|
| 19 |
+
"""Returns a bool_list from a boolean."""
|
| 20 |
+
return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)]))
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def serialize_example(action, base_action, qpos, qvel, cam_high, cam_left_wrist, cam_right_wrist, instruction, terminate_episode):
|
| 24 |
+
feature = {
|
| 25 |
+
'action': _bytes_feature(tf.io.serialize_tensor(action)),
|
| 26 |
+
'base_action': _bytes_feature(tf.io.serialize_tensor(base_action)),
|
| 27 |
+
'qpos': _bytes_feature(tf.io.serialize_tensor(qpos)),
|
| 28 |
+
'qvel': _bytes_feature(tf.io.serialize_tensor(qvel)),
|
| 29 |
+
'cam_high': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_high.tobytes(), dtype=tf.string))),
|
| 30 |
+
'cam_left_wrist': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_left_wrist.tobytes(), dtype=tf.string))),
|
| 31 |
+
'cam_right_wrist': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_right_wrist.tobytes(), dtype=tf.string))),
|
| 32 |
+
'instruction': _bytes_feature(instruction),
|
| 33 |
+
'terminate_episode': _bool_feature(terminate_episode)
|
| 34 |
+
}
|
| 35 |
+
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
|
| 36 |
+
return example_proto.SerializeToString()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def process_hdf5_file(args):
|
| 40 |
+
filepath, root_dir, out_dir = args
|
| 41 |
+
output_dir = os.path.join(out_dir, os.path.relpath(os.path.dirname(filepath), root_dir))
|
| 42 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 43 |
+
filename = os.path.basename(filepath)
|
| 44 |
+
tfrecord_path = os.path.join(output_dir, filename.replace('.hdf5', '.tfrecord'))
|
| 45 |
+
|
| 46 |
+
if os.path.exists(tfrecord_path) and os.path.getsize(tfrecord_path) > 0:
|
| 47 |
+
return f"TFRecords already exist at {tfrecord_path}"
|
| 48 |
+
try:
|
| 49 |
+
with h5py.File(filepath, 'r') as f, tf.io.TFRecordWriter(tfrecord_path) as writer:
|
| 50 |
+
num_episodes = f['action'].shape[0]
|
| 51 |
+
# Remove the first few still steps
|
| 52 |
+
EPS = 1e-2
|
| 53 |
+
qpos = f['observations']['qpos'][:]
|
| 54 |
+
# Get the idx of the first qpos whose delta exceeds the threshold
|
| 55 |
+
qpos_delta = np.abs(qpos - qpos[0:1])
|
| 56 |
+
indices = np.where(np.any(qpos_delta > EPS, axis=1))[0]
|
| 57 |
+
if len(indices) > 0:
|
| 58 |
+
first_idx = indices[0]
|
| 59 |
+
else:
|
| 60 |
+
raise ValueError("Found no qpos that exceeds the threshold.")
|
| 61 |
+
|
| 62 |
+
for i in range(first_idx-1, num_episodes):
|
| 63 |
+
action = f['action'][i]
|
| 64 |
+
base_action = f['base_action'][i]
|
| 65 |
+
qpos = f['observations']['qpos'][i]
|
| 66 |
+
qvel = f['observations']['qvel'][i]
|
| 67 |
+
cam_high = f['observations']['images']['cam_high'][i]
|
| 68 |
+
cam_left_wrist = f['observations']['images']['cam_left_wrist'][i]
|
| 69 |
+
cam_right_wrist = f['observations']['images']['cam_right_wrist'][i]
|
| 70 |
+
instruction = f['instruction'][()]
|
| 71 |
+
terminate_episode = i == num_episodes - 1
|
| 72 |
+
serialized_example = serialize_example(action, base_action, qpos, qvel, cam_high, cam_left_wrist, cam_right_wrist, instruction, terminate_episode)
|
| 73 |
+
writer.write(serialized_example)
|
| 74 |
+
except Exception as e:
|
| 75 |
+
with open("error_log.txt", "a") as f:
|
| 76 |
+
f.write(f"{filepath}\n")
|
| 77 |
+
print(f"error at {filepath}: {e}")
|
| 78 |
+
return f"TFRecords written to {tfrecord_path}"
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def write_tfrecords(root_dir, out_dir):
|
| 82 |
+
if not os.path.exists(out_dir):
|
| 83 |
+
os.makedirs(out_dir)
|
| 84 |
+
|
| 85 |
+
hdf5_files = []
|
| 86 |
+
for root, dirs, files in os.walk(root_dir):
|
| 87 |
+
if os.path.exists(os.path.join(root,"expanded_instruction_gpt-4-turbo.json")):
|
| 88 |
+
# copy the instruction file
|
| 89 |
+
target_path = os.path.join(out_dir, os.path.relpath(root, root_dir))
|
| 90 |
+
os.makedirs(target_path, exist_ok=True)
|
| 91 |
+
shutil.copy(os.path.join(root,"expanded_instruction_gpt-4-turbo.json"), target_path)
|
| 92 |
+
elif os.path.exists(os.path.join(root,"expanded_instruction.json")):
|
| 93 |
+
print(root)
|
| 94 |
+
target_path = os.path.join(out_dir, os.path.relpath(root, root_dir))
|
| 95 |
+
os.makedirs(target_path, exist_ok=True)
|
| 96 |
+
shutil.copy(os.path.join(root,"expanded_instruction.json"), target_path)
|
| 97 |
+
# rename into expanded_instruction_gpt-4-turbo.json
|
| 98 |
+
os.rename(os.path.join(out_dir, os.path.relpath(root, root_dir), "expanded_instruction.json"), os.path.join(out_dir, os.path.relpath(root, root_dir), "expanded_instruction_gpt-4-turbo.json"))
|
| 99 |
+
for filename in fnmatch.filter(files, '*.hdf5'):
|
| 100 |
+
filepath = os.path.join(root, filename)
|
| 101 |
+
hdf5_files.append((filepath, root_dir, out_dir))
|
| 102 |
+
|
| 103 |
+
with Pool(16) as pool:
|
| 104 |
+
max_count = len(hdf5_files)
|
| 105 |
+
with tqdm(total=max_count) as pbar:
|
| 106 |
+
for _ in pool.imap_unordered(process_hdf5_file, hdf5_files):
|
| 107 |
+
pbar.update(1)
|
| 108 |
+
|
| 109 |
+
print(f"TFRecords written to {out_dir}")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
root_dir = "../datasets/agilex/rdt_data/"
|
| 113 |
+
out_dir = "../datasets/agilex/tfrecords/"
|
| 114 |
+
write_tfrecords(root_dir, out_dir)
|
data/empty_lang_embed.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b073685d3b8627ac068e7907f4d53e1b831729fd34e01e05ed96ebe53bf19633
|
| 3 |
+
size 9432
|