diff --git a/.github/rename_project.sh b/.github/rename_project.sh deleted file mode 100644 index 4ce3dfe..0000000 --- a/.github/rename_project.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -while getopts a:n:u:d: flag -do - case "${flag}" in - a) author=${OPTARG};; - n) name=${OPTARG};; - u) urlname=${OPTARG};; - d) description=${OPTARG};; - esac -done - -echo "Author: $author"; -echo "Project Name: $name"; -echo "Project URL name: $urlname"; -echo "Description: $description"; - -echo "Renaming project..." - -original_author="BerkeleyAutomation" -original_name="fog_x" -original_urlname="fog_x" -original_description="Awesome fog_x created by BerkeleyAutomation" -# for filename in $(find . -name "*.*") -for filename in $(git ls-files) -do - sed -i "s/$original_author/$author/g" $filename - sed -i "s/$original_name/$name/g" $filename - sed -i "s/$original_urlname/$urlname/g" $filename - sed -i "s/$original_description/$description/g" $filename - echo "Renamed $filename" -done - -mv fog_x $name - -# This command runs only once on GHA! -rm -rf .github/template.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e8d83ed..c2a2862 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,24 +3,42 @@ name: Lint on: push: paths: - - '*.py' + - '**.py' + - '**/pyproject.toml' + - '**/pytest.ini' + pull_request: + paths: + - '**.py' + - '**/pyproject.toml' + - '**/pytest.ini' jobs: mypy: + name: MyPy Type Check runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python uses: actions/setup-python@v5 with: - python-version: 3.7.4 - architecture: x64 - - name: Checkout - uses: actions/checkout@v4 - - name: Install mypy - run: pip install mypy - - name: Run mypy - uses: sasanquaneuf/mypy-github-action@releases/v1 + python-version: '3.10' + + - name: Cache pip packages + uses: actions/cache@v4 with: - checkName: 'mypy' # NOTE: this needs to be the same as the job name - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + path: ~/.cache/pip + key: ${{ runner.os }}-pip-mypy-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip-mypy- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install mypy + pip install -e . + + - name: Run mypy + run: | + mypy robodm --ignore-missing-imports --check-untyped-defs --show-error-codes --pretty \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6dee7ae..6df835e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,49 +6,198 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main ] + branches: [ main, master ] pull_request: - branches: [ main ] + branches: [ main, master ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: +env: + PYTHONPATH: ${{ github.workspace }} + jobs: + format-check: + name: Format Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Cache pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-format-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip-format- + + - name: Install formatting tools + run: | + python -m pip install --upgrade pip + pip install yapf black isort mypy pylint flake8 + + - name: Run format check + run: | + bash format.sh --all + + - name: Check for formatting changes + run: | + if ! git diff --quiet; then + echo "Code formatting issues detected. Please run 'bash format.sh --all' locally." + git diff + exit 1 + fi + linter: + name: Lint + runs-on: ubuntu-latest + needs: format-check strategy: fail-fast: false matrix: - python-version: [3.9] - os: [ubuntu-latest] - runs-on: ${{ matrix.os }} + python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + + - name: Cache pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-lint-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip-lint-${{ matrix.python-version }}- + - name: Install project - run: make install + run: | + python -m pip install --upgrade pip + # Install test dependencies + pip install pytest pytest-cov flake8 black mypy isort yapf pylint + # Install project in editable mode + pip install -e . + - name: Run linter run: make lint - tests_linux: + tests: + name: Tests + runs-on: ${{ matrix.os }} needs: linter strategy: fail-fast: false matrix: - python-version: [3.9] - os: [ubuntu-latest] - runs-on: ${{ matrix.os }} + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.10', '3.11', '3.12'] + exclude: + # Reduce CI load by testing fewer combinations on non-Ubuntu + - os: macos-latest + python-version: '3.11' + - os: windows-latest + python-version: '3.11' steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install project - run: make install - - name: Run tests - run: make test - - name: "Upload coverage to Codecov" + + - name: Cache pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-test-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip-test-${{ matrix.python-version }}- + + - name: Install system dependencies (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + + - name: Install system dependencies (macOS) + if: matrix.os == 'macos-latest' + run: | + brew install ffmpeg + + - name: Install system dependencies (Windows) + if: matrix.os == 'windows-latest' + shell: powershell + run: | + # Install ffmpeg via chocolatey + choco install ffmpeg -y + + - name: Install project with test dependencies + run: | + python -m pip install --upgrade pip + # Install test dependencies + pip install pytest pytest-cov pytest-benchmark coverage + # Install project with optional dependencies for comprehensive testing + pip install -e .[all] + + - name: Run fast tests + run: | + pytest tests/ -v -m "not slow and not benchmark" --cov=robodm --cov-report=xml --cov-report=term-missing + + - name: Run slow tests (Ubuntu only) + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' + run: | + pytest tests/ -v -m "slow" --cov=robodm --cov-append --cov-report=xml + + - name: Upload coverage to Codecov + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' uses: codecov/codecov-action@v4 - # with: - # fail_ci_if_error: true + with: + file: ./coverage.xml + fail_ci_if_error: false + verbose: true + + benchmark: + name: Benchmark Tests + runs-on: ubuntu-latest + needs: tests + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + + - name: Install project with all dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-benchmark + pip install -e .[all] + + - name: Run benchmark tests + run: | + pytest tests/ -v -m "benchmark" --benchmark-only --benchmark-json=benchmark.json + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + if: always() + with: + tool: 'pytest' + output-file-path: benchmark.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + comment-on-alert: true + alert-threshold: '200%' + fail-on-alert: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0dc9472..378bcea 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,12 +1,9 @@ -name: Upload Python Package +name: Release on: push: - # Sequence of patterns matched against refs/tags tags: - - '*' # Push events to matching v*, i.e. v1.0, v20.15.10 - - # Allows you to run this workflow manually from the Actions tab + - '*' # Push events to matching any tag workflow_dispatch: jobs: @@ -18,33 +15,66 @@ jobs: steps: - uses: actions/checkout@v4 with: - # by default, it uses a depth of 1 - # this fetches all history so that we can read each commit - fetch-depth: 0 + fetch-depth: 0 # Fetch all history for changelog generation + - name: Generate Changelog run: .github/release_message.sh > release_message.md + - name: Release uses: softprops/action-gh-release@v2 with: body_path: release_message.md + test-before-deploy: + name: Test Before Deploy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y ffmpeg + + - name: Install and test + run: | + python -m pip install --upgrade pip + pip install pytest + pip install -e .[all] + pytest tests/ -m "not slow and not benchmark" -x + deploy: - needs: release + name: Deploy to PyPI + needs: [release, test-before-deploy] runs-on: ubuntu-latest + environment: release + permissions: + id-token: write # For trusted publishing steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: python -m build + + - name: Check package + run: twine check dist/* + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + skip-existing: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7d807fc..3a600ff 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,16 +1,19 @@ -# How to develop on this project +# Contributing to Robo-DM -fog_x welcomes contributions from the community. +robodm welcomes contributions from the community. **You need PYTHON3!** This instructions are for linux base systems. (Linux, MacOS, BSD, etc.) -## Setting up your own fork of this repo. -- On github interface click on `Fork` button. -- Clone your fork of this repo. `git clone git@github.com:YOUR_GIT_USERNAME/fog_x.git` -- Enter the directory `cd fog_x` -- Add upstream repo `git remote add upstream https://github.com/BerkeleyAutomation/fog_x` +## Development Setup + +To set up a development environment: + +1. Fork the repository on GitHub +2. Clone your fork of this repo. `git clone git@github.com:YOUR_GIT_USERNAME/robodm.git` +3. Enter the directory `cd robodm` +4. Add upstream repo `git remote add upstream https://github.com/BerkeleyAutomation/robodm` ## Setting up your own virtual environment @@ -68,6 +71,7 @@ Run `git push origin my_contribution` On github interface, click on `Pull Request` button. Wait CI to run and one of the developers will review your PR. + ## Makefile utilities This project comes with a `Makefile` that contains a number of useful utility. diff --git a/Containerfile b/Containerfile index 83c8e89..74b19aa 100644 --- a/Containerfile +++ b/Containerfile @@ -23,4 +23,4 @@ RUN pip3 install jupyter COPY . / -CMD ["fog_x"] +CMD ["robodm"] diff --git a/MANIFEST.in b/MANIFEST.in index 9c415d1..183bb09 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ +include README.md include LICENSE include HISTORY.md -include Containerfile -graft tests -graft fog_x +graft robodm +global-exclude *.pyc +global-exclude __pycache__ diff --git a/Makefile b/Makefile index 962034c..fdcc8c6 100644 --- a/Makefile +++ b/Makefile @@ -26,22 +26,22 @@ install: ## Install the project in dev mode. .PHONY: fmt fmt: ## Format code using black & isort. - $(ENV_PREFIX)isort fog_x/ - $(ENV_PREFIX)black -l 79 fog_x/ + $(ENV_PREFIX)isort robodm/ + $(ENV_PREFIX)black -l 79 robodm/ $(ENV_PREFIX)black -l 79 tests/ $(ENV_PREFIX)isort examples/ $(ENV_PREFIX)black -l 79 examples/ .PHONY: lint lint: ## Run pep8, black, mypy linters. - $(ENV_PREFIX)flake8 fog_x/ - $(ENV_PREFIX)black -l 79 --check fog_x/ + $(ENV_PREFIX)flake8 robodm/ + $(ENV_PREFIX)black -l 79 --check robodm/ $(ENV_PREFIX)black -l 79 --check tests/ - $(ENV_PREFIX)mypy --ignore-missing-imports fog_x/ + $(ENV_PREFIX)mypy --ignore-missing-imports robodm/ .PHONY: test test: lint ## Run tests and generate coverage report. - $(ENV_PREFIX)pytest -v --cov-config .coveragerc --cov=fog_x -l --tb=short --maxfail=1 tests/ + $(ENV_PREFIX)pytest -v --cov-config .coveragerc --cov=robodm -l --tb=short --maxfail=1 tests/ $(ENV_PREFIX)coverage xml $(ENV_PREFIX)coverage html @@ -80,9 +80,9 @@ virtualenv: ## Create a virtual environment. release: ## Create a new tag for release. @echo "WARNING: This operation will create s version tag and push to github" @read -p "Version? (provide the next x.y.z semver) : " TAG - @echo "$${TAG}" > fog_x/VERSION + @echo "$${TAG}" > robodm/VERSION @$(ENV_PREFIX)gitchangelog > HISTORY.md - @git add fog_x/VERSION HISTORY.md + @git add robodm/VERSION HISTORY.md @git commit -m "release: version $${TAG} ๐Ÿš€" @echo "creating git tag : $${TAG}" @git tag $${TAG} @@ -103,7 +103,7 @@ switch-to-poetry: ## Switch to poetry package manager. @poetry init --no-interaction --name=a_flask_test --author=rochacbruno @echo "" >> pyproject.toml @echo "[tool.poetry.scripts]" >> pyproject.toml - @echo "fog_x = 'fog_x.__main__:main'" >> pyproject.toml + @echo "robodm = 'robodm.__main__:main'" >> pyproject.toml @cat requirements.txt | while read in; do poetry add --no-interaction "$${in}"; done @cat requirements-test.txt | while read in; do poetry add --no-interaction "$${in}" --dev; done @poetry install --no-interaction @@ -111,7 +111,7 @@ switch-to-poetry: ## Switch to poetry package manager. @mv requirements* .github/backup @mv setup.py .github/backup @echo "You have switched to https://python-poetry.org/ package manager." - @echo "Please run 'poetry shell' or 'poetry run fog_x'" + @echo "Please run 'poetry shell' or 'poetry run robodm'" .PHONY: init init: ## Initialize the project based on an application template. diff --git a/README.md b/README.md index b03e7b5..ca187c8 100644 --- a/README.md +++ b/README.md @@ -3,29 +3,27 @@ **An Efficient and Scalable Data Collection and Management Framework For Robotics Learning** [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) -[![License](https://img.shields.io/github/license/BerkeleyAutomation/fog_x)](LICENSE) -[![Tests](https://github.com/BerkeleyAutomation/fog_x/workflows/Tests/badge.svg)](https://github.com/BerkeleyAutomation/fog_x/actions) +[![License](https://img.shields.io/github/license/BerkeleyAutomation/robodm)](LICENSE) +[![Tests](https://github.com/BerkeleyAutomation/robodm/workflows/Tests/badge.svg)](https://github.com/BerkeleyAutomation/robodm/actions) -fog_x is a high-performance robotics data management framework that enables efficient collection, storage, and retrieval of multimodal robotics trajectories. Built with speed ๐Ÿš€ and memory efficiency ๐Ÿ“ˆ in mind, fog_x provides native support for various robotics data formats and cloud storage systems. +robodm is a high-performance robotics data management framework that enables efficient collection, storage, and retrieval of multimodal robotics trajectories. Built with speed ๐Ÿš€ and memory efficiency ๐Ÿ“ˆ in mind, robodm provides native support for various robotics data formats and cloud storage systems. ## โœจ Key Features - **๐Ÿš€ High Performance**: Optimized for speed with active metadata and lazily-loaded trajectory data - **๐Ÿ“ˆ Memory Efficient**: Smart data loading and compression strategies minimize memory usage - **๐ŸŽฅ Advanced Video Compression**: Support for multiple codecs (H.264, H.265, AV1, FFV1) with automatic codec selection -- **โ˜๏ธ Cloud Native**: Built-in support for cloud storage systems (AWS S3, etc.) - **๐Ÿ”„ Format Compatibility**: Native support for Open-X-Embodiment, HuggingFace datasets, RLDS, and HDF5 - **๐ŸŽฏ Flexible Data Types**: Handle images, videos, sensor data, and custom features seamlessly - **๐Ÿ—๏ธ Distributed Ready**: Flexible dataset partitioning for distributed training workflows -- **๐Ÿงช Test Coverage**: Comprehensive test suite with benchmarking capabilities ## ๐Ÿ› ๏ธ Installation ### Basic Installation ```bash -git clone https://github.com/BerkeleyAutomation/fog_x.git -cd fog_x +git clone https://github.com/BerkeleyAutomation/robodm.git +cd robodm pip install -e . ``` @@ -54,10 +52,10 @@ pip install -e .[all] ```python import numpy as np -import fog_x +import robodm # Create a new trajectory for data collection -trajectory = fog_x.Trajectory(path="/tmp/robot_demo.vla", mode="w") +trajectory = robodm.Trajectory(path="/tmp/robot_demo.vla", mode="w") # Collect multimodal robotics data for step in range(100): @@ -77,7 +75,7 @@ for step in range(100): trajectory.close() # Load the trajectory for training -trajectory = fog_x.Trajectory(path="/tmp/robot_demo.vla", mode="r") +trajectory = robodm.Trajectory(path="/tmp/robot_demo.vla", mode="r") data = trajectory.load() print(f"Loaded trajectory with {len(data['camera/rgb'])} timesteps") @@ -88,7 +86,7 @@ print(f"Joint positions shape: {data['robot/joint_positions'][0].shape}") ### Batch Data Creation ```python -import fog_x +import robodm # Create trajectory from dictionary of lists data = { @@ -97,7 +95,7 @@ data = { "action": [np.random.rand(7) for _ in range(50)], } -trajectory = fog_x.Trajectory.from_dict_of_lists( +trajectory = robodm.Trajectory.from_dict_of_lists( data=data, path="/tmp/batch_trajectory.vla", video_codec="libaom-av1" # Use AV1 codec for efficient compression @@ -107,10 +105,10 @@ trajectory = fog_x.Trajectory.from_dict_of_lists( ### Advanced Configuration ```python -import fog_x +import robodm # Configure video compression settings -trajectory = fog_x.Trajectory( +trajectory = robodm.Trajectory( path="/tmp/compressed_demo.vla", mode="w", video_codec="libx265", # Use H.265 codec @@ -127,49 +125,9 @@ trajectory.add("sensors/camera/wrist/rgb", wrist_camera) trajectory.add("control/arm/joint_positions", joint_positions) ``` -## ๐Ÿ“Š Data Loaders - -fog_x includes specialized loaders for common robotics datasets: - -### HDF5 Loader - -```python -from fog_x.loader import HDF5Loader - -# Convert HDF5 datasets to fog_x format -loader = HDF5Loader() -loader.convert_to_trajectory( - input_path="/path/to/dataset.h5", - output_path="/path/to/output.vla" -) -``` - -### RLDS (Reverb Dataset) Loader - -```python -from fog_x.loader import RLDSLoader - -# Load from RLDS format -loader = RLDSLoader() -trajectory = loader.load_from_rlds( - dataset_path="/path/to/rlds_dataset", - output_path="/path/to/output.vla" -) -``` - -### VLA (Video Language Action) Loader - -```python -from fog_x.loader import VLALoader - -# Efficient VLA data loading -loader = VLALoader() -dataset = loader.load_dataset("/path/to/vla_files") -``` - ## ๐ŸŽฅ Video Codec Support -fog_x supports multiple video codecs for efficient storage of visual data: +robodm supports multiple video codecs for efficient storage of visual data: | Codec | Use Case | Compression | Quality | |-------|----------|-------------|---------| @@ -182,81 +140,10 @@ fog_x supports multiple video codecs for efficient storage of visual data: ```python # Automatic codec selection based on data characteristics -trajectory = fog_x.Trajectory(path="auto.vla", mode="w", video_codec="auto") +trajectory = robodm.Trajectory(path="auto.vla", mode="w", video_codec="auto") # Manual codec selection for specific needs -trajectory = fog_x.Trajectory(path="lossless.vla", mode="w", video_codec="ffv1") -``` - -## โ˜๏ธ Cloud Storage Integration - -```python -import fog_x - -# Direct S3 integration (requires aws optional dependencies) -trajectory = fog_x.Trajectory( - path="s3://my-bucket/trajectories/demo.vla", - mode="w" -) - -# Add data as usual -trajectory.add("observation", image_data) -trajectory.close() - -# Load from cloud storage -trajectory = fog_x.Trajectory( - path="s3://my-bucket/trajectories/demo.vla", - mode="r" -) -data = trajectory.load() -``` - -## ๐Ÿญ Factory Pattern for Advanced Use Cases - -```python -from fog_x import TrajectoryFactory - -# Create factory with custom dependencies -factory = TrajectoryFactory( - filesystem=custom_filesystem, - time_provider=custom_timer -) - -# Create trajectories with dependency injection -trajectory = factory.create_trajectory( - path="/tmp/test.vla", - mode="w", - video_codec="libaom-av1" -) -``` - -## ๐Ÿ”ง API Reference - -### Core Classes - -- **`Trajectory`**: Main class for data collection and loading -- **`FeatureType`**: Type system for trajectory features -- **`TrajectoryFactory`**: Factory for creating trajectory instances -- **`CodecConfig`**: Video codec configuration management - -### Key Methods - -- **`add(feature, data, timestamp=None)`**: Add single feature to trajectory -- **`add_by_dict(data, timestamp=None)`**: Add multiple features from dictionary -- **`load(return_type="numpy")`**: Load trajectory data -- **`close(compact=True)`**: Close and optionally compact trajectory -- **`from_dict_of_lists(data, path, ...)`**: Create trajectory from structured data - -## ๐Ÿ“ˆ Performance & Benchmarks - -Run benchmarks to test performance on your system: - -```bash -# Run comprehensive benchmarks -python -m pytest tests/test_trajectory.py::test_benchmark -v - -# Run specific codec benchmarks -python tests/benchmark_codecs.py +trajectory = robodm.Trajectory(path="lossless.vla", mode="w", video_codec="ffv1") ``` ## ๐Ÿงช Development & Testing @@ -275,18 +162,6 @@ pytest tests/test_trajectory.py -v pytest tests/test_loaders.py -v ``` -### Code Quality - -```bash -# Format code -make fmt - -# Run linters -make lint - -# Generate documentation -make docs -``` ## ๐Ÿ“ Examples @@ -311,7 +186,7 @@ This project is licensed under the BSD 3-Clause License. See [LICENSE](LICENSE) ## ๐Ÿ“š Citation -If you use fog_x in your research, please cite: +If you use robodm in your research, please cite: ```bibtex @article{chen2025robo, diff --git a/examples/data_collection_and_load.py b/examples/data_collection_and_load.py index e016112..528616f 100644 --- a/examples/data_collection_and_load.py +++ b/examples/data_collection_and_load.py @@ -1,31 +1,38 @@ -import time +import os +import tempfile import numpy as np -import fog_x +import robodm -path = "/tmp/output.vla" +if __name__ == "__main__": + path = os.path.join(tempfile.gettempdir(), "test_trajectory.vla") -# ๐ŸฆŠ Data collection: -# create a new trajectory -traj = fog_x.Trajectory(path=path, mode="w") + # Create a trajectory + traj = robodm.Trajectory(path=path, mode="w") -# collect step data for the episode -for i in range(100): - time.sleep(0.001) - traj.add(feature="arm_view", data=np.ones((640, 480, 3), dtype=np.uint8)) - traj.add(feature="gripper_pose", data=np.ones((4, 4), dtype=np.float32)) - traj.add(feature="view", data=np.ones((640, 480, 3), dtype=np.uint8)) - traj.add(feature="wrist_view", data=np.ones((640, 480, 3), dtype=np.uint8)) - traj.add(feature="joint_angles", data=np.ones((7, ), dtype=np.float32)) - traj.add(feature="joint_velocities", data=np.ones((7, ), dtype=np.float32)) - traj.add(feature="joint_torques", data=np.ones((7, ), dtype=np.float32)) - traj.add(feature="ee_force", data=np.ones((6, ), dtype=np.float32)) - traj.add(feature="ee_velocity", data=np.ones((6, ), dtype=np.float32)) - traj.add(feature="ee_pose", data=np.ones((4, 4), dtype=np.float32)) + # Add some data + for i in range(10): + traj.add( + "observation/image", + np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8), + ) + traj.add("observation/state", np.random.rand(10).astype(np.float32)) + traj.add("action", np.random.rand(7).astype(np.float32)) -traj.close() + # Close the trajectory + traj.close() -traj = fog_x.Trajectory(path=path, mode="r") + print(f"Trajectory saved to {path}") -print(traj.load()) + # Load the trajectory + traj = robodm.Trajectory(path=path, mode="r") + data = traj.load() + + print(f"Loaded trajectory with {len(data['observation/image'])} timesteps") + print(f"Image shape: {data['observation/image'][0].shape}") + print(f"State shape: {data['observation/state'][0].shape}") + print(f"Action shape: {data['action'][0].shape}") + + # Clean up + os.remove(path) diff --git a/fog_x/__init__.py b/fog_x/__init__.py deleted file mode 100644 index b2cd4fd..0000000 --- a/fog_x/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -import os - -__root_dir__ = os.path.dirname(os.path.abspath(__file__)) - -# from fog_x import dataset, episode, feature -# from fog_x.dataset import Dataset -# from fog_x import trajectory - -from fog_x.feature import FeatureType -from fog_x.trajectory import Trajectory -from fog_x.trajectory_base import (FileSystemInterface, TimeProvider, - TrajectoryInterface) -from fog_x.trajectory_factory import TrajectoryFactory, create_trajectory - -all = [ - "trajectory", - "FeatureType", - "Trajectory", - "TrajectoryInterface", - "FileSystemInterface", - "TimeProvider", - "TrajectoryFactory", - "create_trajectory", -] - -import logging - -_FORMAT = "%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s" -logging.basicConfig(format=_FORMAT) -logging.root.setLevel(logging.INFO) diff --git a/format.sh b/format.sh index 0d02e95..751d9e4 100755 --- a/format.sh +++ b/format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# YAPF formatter, adapted for fog_x project. +# YAPF formatter, adapted for robodm project. # # Usage: # # Do work and commit your work. @@ -7,6 +7,12 @@ # # Format files that differ from origin/main. # bash format.sh +# # Check formatting without making changes (for CI) +# bash format.sh --check + +# # Format all files +# bash format.sh --all + # # Commit changed files with message 'Run yapf and pylint' # # @@ -21,11 +27,37 @@ builtin cd "$(dirname "${BASH_SOURCE:-$0}")" ROOT="$(git rev-parse --show-toplevel)" builtin cd "$ROOT" || exit 1 +# Parse command line arguments +CHECK_ONLY=false +RUN_ALL=false + +while [[ $# -gt 0 ]]; do + case $1 in + --check) + CHECK_ONLY=true + shift + ;; + --all) + RUN_ALL=true + shift + ;; + --files) + # Keep existing behavior for --files + break + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--check] [--all] [--files file1 file2 ...]" + exit 1 + ;; + esac +done + # Check if tools are installed before getting versions check_tool_installed() { if ! command -v "$1" &> /dev/null; then echo "Error: $1 is not installed. Please install development dependencies." - echo "You can install them with: pip install yapf black isort mypy pylint" + echo "You can install them with: pip install yapf black isort mypy pylint flake8" exit 1 fi } @@ -55,17 +87,28 @@ YAPF_FLAGS=( '--parallel' ) +# Add --diff flag for check mode +if [ "$CHECK_ONLY" = true ]; then + YAPF_FLAGS+=('--diff') + BLACK_FLAGS=('--check' '--diff') + ISORT_FLAGS=('--check-only' '--diff') +else + YAPF_FLAGS+=('--in-place') + BLACK_FLAGS=() + ISORT_FLAGS=() +fi + YAPF_EXCLUDES=( '--exclude' 'build/**' '--exclude' '.pytest_cache/**' - '--exclude' 'fog_x.egg-info/**' + '--exclude' 'robodm.egg-info/**' '--exclude' '__pycache__/**' ) ISORT_EXCLUDES=( '--sg' 'build/**' '--sg' '.pytest_cache/**' - '--sg' 'fog_x.egg-info/**' + '--sg' 'robodm.egg-info/**' '--sg' '__pycache__/**' ) @@ -73,9 +116,22 @@ PYLINT_FLAGS=( '--disable=C0103,C0114,C0115,C0116' # Disable some overly strict checks ) +# Track if any formatting issues were found +FORMAT_ISSUES=false + # Format specified files format() { - yapf --in-place "${YAPF_FLAGS[@]}" "$@" + if [ "$CHECK_ONLY" = true ]; then + if ! yapf "${YAPF_FLAGS[@]}" "$@" | grep -q .; then + return 0 + else + echo "YAPF formatting issues found" + FORMAT_ISSUES=true + return 1 + fi + else + yapf "${YAPF_FLAGS[@]}" "$@" + fi } # Format files that differ from main branch. Ignores dirs that are not slated @@ -90,19 +146,42 @@ format_changed() { MERGEBASE="$(git merge-base origin/main HEAD 2>/dev/null || git merge-base origin/master HEAD 2>/dev/null || echo HEAD~1)" if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | \ - tr '\n' '\0' | xargs -P 5 -0 \ - yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}" + local files + files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi') + if [ -n "$files" ]; then + echo "$files" | tr '\n' '\0' | xargs -P 5 -0 \ + yapf "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}" + fi fi } # Format all files format_all() { - yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" fog_x tests examples + if [ "$CHECK_ONLY" = true ]; then + echo "Checking YAPF formatting..." + if ! yapf "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" robodm tests examples | grep -q .; then + echo "โœ“ YAPF: No formatting issues" + else + echo "โœ— YAPF: Formatting issues found" + FORMAT_ISSUES=true + fi + else + yapf "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" robodm tests examples + fi } -echo 'fog_x Black formatting:' -black fog_x tests examples +echo 'robodm Black formatting:' +if [ "$CHECK_ONLY" = true ]; then + echo "Checking Black formatting..." + if black "${BLACK_FLAGS[@]}" robodm tests examples; then + echo "โœ“ Black: No formatting issues" + else + echo "โœ— Black: Formatting issues found" + FORMAT_ISSUES=true + fi +else + black "${BLACK_FLAGS[@]}" robodm tests examples +fi ## This flag formats individual files. --files *must* be the first command line ## arg to use this option. @@ -110,46 +189,98 @@ if [[ "$1" == '--files' ]]; then format "${@:2}" # If `--all` is passed, then any further arguments are ignored and the # entire python directory is formatted. -elif [[ "$1" == '--all' ]]; then +elif [[ "$RUN_ALL" == true ]]; then format_all else # Format only the files that changed in last commit. format_changed fi -echo 'fog_x yapf: Done' +echo 'robodm yapf: Done' -echo 'fog_x isort:' -isort fog_x tests examples "${ISORT_EXCLUDES[@]}" +echo 'robodm isort:' +if [ "$CHECK_ONLY" = true ]; then + echo "Checking isort formatting..." + if isort "${ISORT_FLAGS[@]}" robodm tests examples "${ISORT_EXCLUDES[@]}"; then + echo "โœ“ isort: No formatting issues" + else + echo "โœ— isort: Formatting issues found" + FORMAT_ISSUES=true + fi +else + isort "${ISORT_FLAGS[@]}" robodm tests examples "${ISORT_EXCLUDES[@]}" +fi # Run mypy -echo 'fog_x mypy:' +echo 'robodm mypy:' # Check if there are any Python files to check -if find fog_x -name "*.py" | head -1 | grep -q .; then - mypy fog_x --ignore-missing-imports --check-untyped-defs +if find robodm -name "*.py" | head -1 | grep -q .; then + if mypy robodm --ignore-missing-imports --check-untyped-defs; then + echo "โœ“ MyPy: No type issues" + else + echo "โœ— MyPy: Type issues found" + if [ "$CHECK_ONLY" = true ]; then + FORMAT_ISSUES=true + fi + fi else - echo "No Python files found in fog_x/" + echo "No Python files found in robodm/" fi # Run Pylint -echo 'fog_x Pylint:' +echo 'robodm Pylint:' if [[ "$1" == '--files' ]]; then - # If --files is passed, filter to files within fog_x/ and pass to pylint. - pylint "${PYLINT_FLAGS[@]}" "${@:2}" -elif [[ "$1" == '--all' ]]; then - # Pylint entire fog_x directory. - if find fog_x -name "*.py" | head -1 | grep -q .; then - pylint "${PYLINT_FLAGS[@]}" fog_x + # If --files is passed, filter to files within robodm/ and pass to pylint. + if pylint "${PYLINT_FLAGS[@]}" "${@:2}"; then + echo "โœ“ Pylint: No issues" else - echo "No Python files found in fog_x/" + echo "โœ— Pylint: Issues found" + if [ "$CHECK_ONLY" = true ]; then + FORMAT_ISSUES=true + fi + fi +elif [[ "$RUN_ALL" == true ]]; then + # Pylint entire robodm directory. + if find robodm -name "*.py" | head -1 | grep -q .; then + if pylint "${PYLINT_FLAGS[@]}" robodm; then + echo "โœ“ Pylint: No issues" + else + echo "โœ— Pylint: Issues found" + if [ "$CHECK_ONLY" = true ]; then + FORMAT_ISSUES=true + fi + fi + else + echo "No Python files found in robodm/" fi else - # Pylint only files in fog_x/ that have changed in last commit. + # Pylint only files in robodm/ that have changed in last commit. MERGEBASE="$(git merge-base origin/main HEAD 2>/dev/null || git merge-base origin/master HEAD 2>/dev/null || echo HEAD~1)" - changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- 'fog_x/*.py' 'fog_x/**/*.py') + changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- 'robodm/*.py' 'robodm/**/*.py') if [[ -n "$changed_files" ]]; then - echo "$changed_files" | tr '\n' '\0' | xargs -0 pylint "${PYLINT_FLAGS[@]}" + if echo "$changed_files" | tr '\n' '\0' | xargs -0 pylint "${PYLINT_FLAGS[@]}"; then + echo "โœ“ Pylint: No issues" + else + echo "โœ— Pylint: Issues found" + if [ "$CHECK_ONLY" = true ]; then + FORMAT_ISSUES=true + fi + fi else - echo 'Pylint skipped: no files changed in fog_x/.' + echo 'Pylint skipped: no files changed in robodm/.' + fi +fi + +# Final status check +if [ "$CHECK_ONLY" = true ]; then + if [ "$FORMAT_ISSUES" = true ]; then + echo "" + echo "โŒ Code formatting/quality issues detected!" + echo "Please run 'bash format.sh --all' to fix formatting issues." + exit 1 + else + echo "" + echo "โœ… All code formatting and quality checks passed!" + exit 0 fi fi @@ -158,8 +289,7 @@ if ! git diff --quiet &>/dev/null; then echo 'Changes not staged for commit:' echo git --no-pager diff --name-only - exit 1 fi -echo 'fog_x formatting complete!' \ No newline at end of file +echo 'robodm formatting complete!' \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index bcbf60f..fe00ae0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,5 +1,16 @@ -site_name: fog_x -theme: readthedocs +site_name: robodm +site_description: An Efficient and Scalable Data Collection and Management Framework For Robotics Learning +site_url: https://github.com/BerkeleyAutomation/robodm/ + +nav: + - Home: index.md + - API Reference: api.md + +theme: + name: material + palette: + primary: blue + accent: orange plugins: - search diff --git a/pyproject.toml b/pyproject.toml index ea399e0..3b763c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,29 +1,61 @@ [build-system] -requires = ["setuptools"] +requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "fog_x" -version = "0.2.0" +name = "robodm" +version = "0.1.0" +description = "An Efficient and Scalable Data Collection and Management Framework For Robotics Learning" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "BSD-3-Clause"} +authors = [ + {name = "Berkeley Automation Lab", email = "automation@berkeley.edu"}, +] +keywords = ["robotics", "data management", "machine learning", "trajectories"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", +] dependencies = [ - "numpy", - "pillow", - "smart_open", - "av", - "requests", - "h5py", + "numpy>=1.21.0", + "h5py>=3.7.0", + "opencv-python>=4.5.0", + "tqdm>=4.64.0", + "psutil>=5.9.0", ] -description = "An Efficient and Scalable Data Collection and Management Framework For Robotics Learning" -readme = {file = "README.md", content-type = "text/markdown"} -license = {file = "LICENSE"} -keywords = ["robotics", "data", "management", "collection", "framework", "learning"] [project.optional-dependencies] -hf = ["datasets"] -rtx = ["tensorflow", "tensorflow_datasets", "envlogger"] -aws = ["boto3"] -torch = ["torch"] -all = ["datasets", "tensorflow", "tensorflow_datasets", "envlogger", "boto3", "smart_open", "torch", "torchvision"] +hf = ["datasets>=2.14.0", "huggingface-hub>=0.16.0"] +rtx = ["tensorflow>=2.13.0", "tensorflow-datasets>=4.9.0"] +aws = ["boto3>=1.26.0", "s3fs>=2023.6.0"] +torch = ["torch>=1.13.0", "torchvision>=0.14.0"] +test = [ + "pytest>=7.0.0", + "pytest-cov>=4.0.0", + "pytest-xdist>=3.0.0", + "pytest-benchmark>=4.0.0", +] +lerobot = ["lerobot>=0.1.0"] +all = ["robodm[hf,rtx,aws,torch,lerobot]"] [project.urls] -homepage = "https://github.com/BerkeleyAutomation/fog_x/" \ No newline at end of file +homepage = "https://github.com/BerkeleyAutomation/robodm/" +repository = "https://github.com/BerkeleyAutomation/robodm/" +documentation = "https://github.com/BerkeleyAutomation/robodm/" +"Bug Tracker" = "https://github.com/BerkeleyAutomation/robodm/issues" + +[tool.setuptools.packages.find] +include = ["robodm*"] + +[tool.setuptools.package-data] +robodm = ["py.typed"] \ No newline at end of file diff --git a/robodm/__init__.py b/robodm/__init__.py new file mode 100644 index 0000000..7df1d5a --- /dev/null +++ b/robodm/__init__.py @@ -0,0 +1,42 @@ +# robodm: A high-performance robotics data management framework +# Copyright (c) 2024 Berkeley Automation Lab + +import os + +__root_dir__ = os.path.dirname(os.path.abspath(__file__)) + +# from robodm import dataset, episode, feature +# from robodm.dataset import Dataset +# from robodm import trajectory + +from robodm.feature import FeatureType +from robodm.trajectory import Trajectory +from robodm.trajectory_base import (FileSystemInterface, TimeProvider, + TrajectoryInterface) +from robodm.trajectory_factory import TrajectoryFactory, create_trajectory + +__all__ = [ + "FeatureType", + "Trajectory", + "TrajectoryInterface", + "FileSystemInterface", + "TimeProvider", + "TrajectoryFactory", + "create_trajectory", +] + +# Version of the robodm package +__version__ = "0.1.0" + +# Metadata +__author__ = "Berkeley Automation Lab" +__email__ = "automation@berkeley.edu" +__description__ = "A high-performance robotics data management framework" +__url__ = "https://github.com/BerkeleyAutomation/robodm" +__license__ = "BSD-3-Clause" + +import logging + +_FORMAT = "%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s" +logging.basicConfig(format=_FORMAT) +logging.root.setLevel(logging.INFO) diff --git a/fog_x/dataset.py b/robodm/dataset.py similarity index 86% rename from fog_x/dataset.py rename to robodm/dataset.py index 6ca887c..b5cca46 100644 --- a/fog_x/dataset.py +++ b/robodm/dataset.py @@ -3,8 +3,8 @@ import numpy as np -from fog_x.loader.vla import NonShuffleVLALoader, VLALoader -from fog_x.utils import data_to_tf_schema +from robodm.loader.vla import NonShuffleVLALoader, VLALoader +from robodm.utils import data_to_tf_schema class VLADataset: @@ -37,9 +37,7 @@ def __init__( self.split = split self.format = format self.shuffle = shuffle - self.loader = NonShuffleVLALoader(path, - batch_size=1, - return_type="tensor") + self.loader = NonShuffleVLALoader(path, batch_size=1, return_type="tensor") def __iter__(self): return self diff --git a/fog_x/feature.py b/robodm/feature.py similarity index 98% rename from fog_x/feature.py rename to robodm/feature.py index 87cfb18..2743276 100644 --- a/fog_x/feature.py +++ b/robodm/feature.py @@ -103,7 +103,8 @@ def from_tf_feature_type(self, tf_feature_spec): dtype = "string" else: raise ValueError( - f"Unsupported conversion from tf feature: {tf_feature_spec}") + f"Unsupported conversion from tf feature: {tf_feature_spec}" + ) self._set(str(dtype), shape) return self @@ -119,7 +120,7 @@ def from_data(cls, data: Any): feature_type._set("bool", ()) elif isinstance(data, list): dtype = type(data[0]).__name__ - data_shape: Tuple[int, ...] = (len(data), ) + data_shape: Tuple[int, ...] = (len(data),) feature_type._set(dtype, data_shape) else: dtype = type(data).__name__ diff --git a/fog_x/loader/__init__.py b/robodm/loader/__init__.py similarity index 100% rename from fog_x/loader/__init__.py rename to robodm/loader/__init__.py diff --git a/fog_x/loader/base.py b/robodm/loader/base.py similarity index 99% rename from fog_x/loader/base.py rename to robodm/loader/base.py index 3b8e8f8..c16cd37 100644 --- a/fog_x/loader/base.py +++ b/robodm/loader/base.py @@ -2,7 +2,6 @@ class BaseLoader: - def __init__(self, path): super(BaseLoader, self).__init__() self.logger = getLogger(__name__) diff --git a/fog_x/loader/hdf5.py b/robodm/loader/hdf5.py similarity index 99% rename from fog_x/loader/hdf5.py rename to robodm/loader/hdf5.py index 0c46839..1dfcbcb 100644 --- a/fog_x/loader/hdf5.py +++ b/robodm/loader/hdf5.py @@ -9,7 +9,7 @@ import torch from torch.utils.data import DataLoader, IterableDataset -from fog_x.utils import _flatten, recursively_read_hdf5_group +from robodm.utils import _flatten, recursively_read_hdf5_group from . import BaseLoader diff --git a/fog_x/loader/lerobot.py b/robodm/loader/lerobot.py similarity index 86% rename from fog_x/loader/lerobot.py rename to robodm/loader/lerobot.py index 6e27dab..cd3b31e 100644 --- a/fog_x/loader/lerobot.py +++ b/robodm/loader/lerobot.py @@ -6,16 +6,11 @@ class LeRobotLoader(BaseLoader): - - def __init__(self, - path, - dataset_name, - batch_size=1, - delta_timestamps=None): + def __init__(self, path, dataset_name, batch_size=1, delta_timestamps=None): super(LeRobotLoader, self).__init__(path) self.batch_size = batch_size self.dataset = LeRobotDataset( - root="/mnt/data/fog_x/hf/", + root="/mnt/data/robodm/hf/", repo_id=dataset_name, delta_timestamps=delta_timestamps, ) @@ -43,9 +38,11 @@ def _frame_to_numpy(frame): self.episode_index = 0 try: from_idx = self.dataset.episode_data_index["from"][ - self.episode_index].item() + self.episode_index + ].item() to_idx = self.dataset.episode_data_index["to"][ - self.episode_index].item() + self.episode_index + ].item() except Exception as e: self.episode_index = 0 continue diff --git a/fog_x/loader/rlds.py b/robodm/loader/rlds.py similarity index 92% rename from fog_x/loader/rlds.py rename to robodm/loader/rlds.py index 0c0e2a0..31d8f8a 100644 --- a/fog_x/loader/rlds.py +++ b/robodm/loader/rlds.py @@ -4,13 +4,9 @@ class RLDSLoader(BaseLoader): - - def __init__(self, - path, - split="train", - batch_size=1, - shuffle_buffer=10, - shuffling=True): + def __init__( + self, path, split="train", batch_size=1, shuffle_buffer=10, shuffling=True + ): super(RLDSLoader, self).__init__(path) try: diff --git a/fog_x/loader/vla.py b/robodm/loader/vla.py similarity index 96% rename from fog_x/loader/vla.py rename to robodm/loader/vla.py index 66fa531..e759ad0 100644 --- a/fog_x/loader/vla.py +++ b/robodm/loader/vla.py @@ -4,8 +4,10 @@ import random from typing import Any, List, Optional, Text -import fog_x -from fog_x.loader.base import BaseLoader +import numpy as np + +import robodm +from robodm.loader.base import BaseLoader logger = logging.getLogger(__name__) @@ -45,7 +47,7 @@ def _get_files(self, path, split): def _read_vla(self, data_path, return_type=None): if return_type is None: return_type = self.return_type - traj = fog_x.Trajectory(data_path) + traj = robodm.Trajectory(data_path) ret = traj.load(return_type=return_type) return ret @@ -159,7 +161,7 @@ def peek(self): def _read_vla(self, data_path, return_type=None): if return_type is None: return_type = self.return_type - traj = fog_x.Trajectory(data_path) + traj = robodm.Trajectory(data_path) ret = traj.load(return_type=return_type) return ret @@ -172,7 +174,7 @@ def get_batch(self): import torch from torch.utils.data import DataLoader, IterableDataset -from fog_x.loader.vla import VLALoader +from robodm.loader.vla import VLALoader class VLAIterableDataset(IterableDataset): diff --git a/fog_x/trajectory.py b/robodm/trajectory.py similarity index 98% rename from fog_x/trajectory.py rename to robodm/trajectory.py index ae5194e..a928206 100644 --- a/fog_x/trajectory.py +++ b/robodm/trajectory.py @@ -13,9 +13,9 @@ import h5py import numpy as np -from fog_x import FeatureType -from fog_x.trajectory_base import TrajectoryInterface -from fog_x.utils import recursively_read_hdf5_group +from robodm import FeatureType +from robodm.trajectory_base import TrajectoryInterface +from robodm.utils import recursively_read_hdf5_group logger = logging.getLogger(__name__) @@ -199,16 +199,16 @@ def __init__( self.feature_name_to_stream: Dict[str, Any] = {} # feature_name: stream - self.feature_name_to_feature_type: Dict[str, FeatureType] = ( - {}) # feature_name: feature_type + self.feature_name_to_feature_type: Dict[str, FeatureType] = { + } # feature_name: feature_type self.trajectory_data = None # trajectory_data self.start_time = self._time() self.mode = mode self.stream_id_to_info: Dict[int, StreamInfo] = {} # stream_id: StreamInfo self.is_closed = False - self.pending_write_tasks: List[Any] = ( - []) # List to keep track of pending write tasks + self.pending_write_tasks: List[Any] = [ + ] # List to keep track of pending write tasks self.container_file: Optional[Any] = None # av.OutputContainer or None # check if the path exists @@ -524,7 +524,7 @@ def from_list_of_dicts( {"feature1": "value3", "feature2": "value4"}, ] - trajectory = Trajectory.from_list_of_dicts(original_trajectory, path="/tmp/fog_x/output.vla") + trajectory = Trajectory.from_list_of_dicts(original_trajectory, path="/tmp/robodm/output.vla") """ traj = cls(path, mode="w", @@ -565,7 +565,7 @@ def from_dict_of_lists( "feature2": ["value2", "value4"], } - trajectory = Trajectory.from_dict_of_lists(original_trajectory, path="/tmp/fog_x/output.vla") + trajectory = Trajectory.from_dict_of_lists(original_trajectory, path="/tmp/robodm/output.vla") """ traj = cls( path, @@ -761,8 +761,8 @@ def _transcode_pickled_images(self, for key, value in stream.metadata.items(): stream_in_updated_container.metadata[key] = value - d_original_stream_id_to_new_container_stream[stream.index] = ( - stream_in_updated_container) + d_original_stream_id_to_new_container_stream[ + stream.index] = stream_in_updated_container # Transcode pickled images and add them to the new container packets_muxed = 0 @@ -950,8 +950,8 @@ def _on_new_stream(self, new_feature, new_encoding, new_feature_type): # new_stream.options = stream.options for key, value in stream.metadata.items(): stream_in_updated_container.metadata[key] = value - d_original_stream_id_to_new_container_stream[stream.index] = ( - stream_in_updated_container) + d_original_stream_id_to_new_container_stream[ + stream.index] = stream_in_updated_container # Add new feature stream new_stream = self._add_stream_to_container(new_container, diff --git a/fog_x/trajectory_base.py b/robodm/trajectory_base.py similarity index 100% rename from fog_x/trajectory_base.py rename to robodm/trajectory_base.py diff --git a/fog_x/trajectory_factory.py b/robodm/trajectory_factory.py similarity index 100% rename from fog_x/trajectory_factory.py rename to robodm/trajectory_factory.py diff --git a/fog_x/utils.py b/robodm/utils.py similarity index 76% rename from fog_x/utils.py rename to robodm/utils.py index 64e485c..8d0b61e 100644 --- a/fog_x/utils.py +++ b/robodm/utils.py @@ -1,8 +1,10 @@ -from typing import Any, Dict +import os +import tempfile +from typing import Any, Dict, List import numpy as np -from fog_x.feature import FeatureType +from robodm.feature import FeatureType def data_to_tf_schema(data: Dict[str, Any]) -> Dict[str, FeatureType]: @@ -16,12 +18,12 @@ def data_to_tf_schema(data: Dict[str, Any]) -> Dict[str, FeatureType]: main_key, sub_key = k.split("/") if main_key not in schema: schema[main_key] = {} - schema[main_key][sub_key] = FeatureType.from_data( - v).to_tf_feature_type(first_dim_none=True) + schema[main_key][sub_key] = FeatureType.from_data(v).to_tf_feature_type( + first_dim_none=True + ) # replace first element of shape with None else: - schema[k] = FeatureType.from_data(v).to_tf_feature_type( - first_dim_none=True) + schema[k] = FeatureType.from_data(v).to_tf_feature_type(first_dim_none=True) return schema @@ -44,9 +46,6 @@ def recursively_read_hdf5_group(group): if isinstance(group, h5py.Dataset): return np.array(group) elif isinstance(group, h5py.Group): - return { - key: recursively_read_hdf5_group(value) - for key, value in group.items() - } + return {key: recursively_read_hdf5_group(value) for key, value in group.items()} else: raise TypeError("Unsupported HDF5 group type") diff --git a/tests/README.md b/tests/README.md index 09bd5e2..c5f8f7f 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,6 +1,6 @@ -# fog_x Test Suite +# robodm Test Suite -This directory contains comprehensive tests for the fog_x trajectory management system, including unit tests, integration tests, and performance benchmarks. +This directory contains comprehensive tests for the robodm trajectory management system, including unit tests, integration tests, and performance benchmarks. ## Test Structure @@ -90,7 +90,7 @@ python -m pytest tests/ ## Benchmarking -The benchmark suite compares fog_x VLA format against: +The benchmark suite compares robodm VLA format against: - **HDF5**: Popular scientific data format - **TFRecord**: TensorFlow's native format (if available) @@ -127,7 +127,7 @@ export PYTEST_IGNORE_SLOW=1 export PYTEST_TEMP_DIR=/path/to/temp # Enable verbose logging -export FOG_X_TEST_VERBOSE=1 +export ROBODM_TEST_VERBOSE=1 ``` ## Adding New Tests @@ -191,7 +191,7 @@ pip install tensorflow # For TFRecord benchmarks #### "Permission denied" errors ```bash # Ensure temp directory is writable -chmod 755 /tmp/fog_x_tests +chmod 755 /tmp/robodm_tests ``` #### Out of memory errors diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py index 5fb5605..a48c19c 100644 --- a/tests/test_fixtures.py +++ b/tests/test_fixtures.py @@ -1,15 +1,17 @@ -"""Test fixtures and mock implementations for fog_x testing.""" +"""Test fixtures and mock implementations for robodm testing.""" import os import shutil import tempfile -from typing import Any, Dict, List, Optional +import time +from typing import Any, Dict, List, Optional, Union from unittest.mock import MagicMock, Mock import numpy as np import pytest -from fog_x.trajectory_base import FileSystemInterface, TimeProvider +from robodm import Trajectory +from robodm.trajectory_base import FileSystemInterface, TimeProvider class MockFileSystem(FileSystemInterface): @@ -162,8 +164,6 @@ def create_vla_dataset(path: str, data: Dict[str, List[Any]], video_codec: str = "auto"): """Create a VLA dataset file for testing.""" - from fog_x import Trajectory - traj = Trajectory.from_dict_of_lists(data, path, video_codec=video_codec) diff --git a/tests/test_loaders.py b/tests/test_loaders.py index bbeffe4..2957e77 100644 --- a/tests/test_loaders.py +++ b/tests/test_loaders.py @@ -8,8 +8,8 @@ import numpy as np import pytest -from fog_x import Trajectory -from fog_x.loader import HDF5Loader, NonShuffleVLALoader +from robodm import Trajectory +from robodm.loader import HDF5Loader, NonShuffleVLALoader from .test_fixtures import BenchmarkDataset @@ -110,7 +110,7 @@ def test_vla_loader_batch_size(self, temp_dir, large_sample_data, codec): try: # Test with batch size - from fog_x.loader.vla import get_vla_dataloader + from robodm.loader.vla import get_vla_dataloader dataloader = get_vla_dataloader(path=temp_dir, batch_size=2) @@ -361,7 +361,7 @@ def test_hdf5_loader_basic(self, temp_dir, large_sample_data, paths.append(path) # Test loading - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader dataloader = get_hdf5_dataloader(path=os.path.join(temp_dir, "*.h5"), batch_size=1, @@ -389,7 +389,7 @@ def test_hdf5_loader_batch_size(self, temp_dir, large_sample_data, benchmark_dataset.create_hdf5_dataset(path, small_data) # Test with batch size - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader dataloader = get_hdf5_dataloader(path=os.path.join(temp_dir, "*.h5"), batch_size=2, @@ -434,7 +434,7 @@ def test_vla_vs_hdf5_data_consistency(self, temp_dir, sample_dict_of_lists, vla_loader = NonShuffleVLALoader(vla_path) vla_data = list(vla_loader)[0] - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader h5_loader = get_hdf5_dataloader(h5_path, batch_size=1, num_workers=0) h5_data = list(h5_loader)[0][0] @@ -486,7 +486,7 @@ def test_hdf5_loader_empty_pattern(self, temp_dir): """Test HDF5 loader with pattern that matches no files.""" pattern = os.path.join(temp_dir, "nonexistent_*.h5") - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader dataloader = get_hdf5_dataloader(pattern, batch_size=1, num_workers=0) @@ -535,7 +535,7 @@ def test_hdf5_loader_memory_usage(self, temp_dir, large_sample_data, benchmark_dataset.create_hdf5_dataset(path, large_sample_data) # Load and measure - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader dataloader = get_hdf5_dataloader(path, batch_size=1, num_workers=0) batches = list(dataloader) diff --git a/tests/test_openx_trajectory.py b/tests/test_openx_trajectory.py index 6557b91..fcab93d 100644 --- a/tests/test_openx_trajectory.py +++ b/tests/test_openx_trajectory.py @@ -9,8 +9,8 @@ import numpy as np import pytest -from fog_x import Trajectory -from fog_x.loader import RLDSLoader +from robodm import Trajectory +from robodm.loader import RLDSLoader from .test_fixtures import MockFileSystem, MockTimeProvider @@ -377,8 +377,8 @@ def test_openx_codec_availability_report(self, temp_dir, mock_openx_data): print("=" * 60) # Ensure at least one codec works with OpenX data - assert (len(available_codecs) - > 0), "No codecs are available for Open X-Embodiment data!" + assert (len(available_codecs) > + 0), "No codecs are available for Open X-Embodiment data!" class TestRLDSLoaderIntegration: @@ -390,7 +390,7 @@ class TestRLDSLoaderIntegration: @pytest.mark.parametrize("video_codec", ["rawvideo", "libx264"]) def test_real_openx_data_codec_comparison(self, temp_dir, video_codec): """Test real OpenX data with different codecs using appropriate validation for each.""" - data_dir = "/home/kych/berkeley/datasets/rtx/fractal20220817_data/0.1.0/" + data_dir = "gs://gresearch/robotics/fractal20220817_data/0.1.0/" dataset_name = "fractal20220817_data" try: @@ -554,7 +554,7 @@ def test_real_openx_data_codec_comparison(self, temp_dir, video_codec): @pytest.mark.parametrize("codec", OPENX_TEST_CODECS) def test_real_openx_data_loading(self, temp_dir, codec): """Test loading real Open X-Embodiment data and compare original vs reconstructed.""" - data_dir = "/home/kych/berkeley/datasets/rtx/fractal20220817_data/0.1.0/" + data_dir = "gs://gresearch/robotics/fractal20220817_data/0.1.0/" dataset_name = "fractal20220817_data" # Define dataset_name for file naming video_codec = codec # Test with lossy codec @@ -1702,8 +1702,8 @@ def test_openx_format_comparison(self, temp_dir, openx_test_data, # Ensure file sizes are reasonable (not empty, not too large) for format_name, metrics in successful_formats.items(): - assert (metrics["file_size_mb"] - > 0), f"{format_name} file should not be empty" + assert (metrics["file_size_mb"] > + 0), f"{format_name} file should not be empty" assert (metrics["file_size_mb"] < original_size_mb * 10), f"{format_name} file suspiciously large" @@ -2181,7 +2181,7 @@ def _int64_feature(value): def _benchmark_vla_loader(self, dataset_info, batch_size=1): """Benchmark VLA loader performance.""" - from fog_x.loader import NonShuffleVLALoader + from robodm.loader import NonShuffleVLALoader start_time = time.time() @@ -2213,7 +2213,7 @@ def _benchmark_vla_loader(self, dataset_info, batch_size=1): def _benchmark_hdf5_loader(self, dataset_info, batch_size=1): """Benchmark HDF5 loader performance.""" try: - from fog_x.loader.hdf5 import get_hdf5_dataloader + from robodm.loader.hdf5 import get_hdf5_dataloader except ImportError: return None @@ -2523,7 +2523,7 @@ def test_openx_loader_benchmark_comprehensive(self, temp_dir, def test_openx_loader_scalability(self, temp_dir): """Test loader scalability with different dataset sizes.""" - sizes = [100, 300, 500] # Number of trajectories + sizes = [1, 3, 5] # Number of trajectories steps_per_traj = 100 print(f"\n=== LOADER SCALABILITY TEST ===") @@ -2760,8 +2760,8 @@ def test_openx_loader_scalability(self, temp_dir): f" ๐Ÿ’พ {fmt2} is {1/size_ratio:.2f}x more compact than {fmt1}" ) - assert (len(scalability_results) - > 0), "At least one scalability test should succeed" + assert (len(scalability_results) > + 0), "At least one scalability test should succeed" # Test scalability characteristics for format_name in formats: @@ -2788,7 +2788,7 @@ def test_openx_loader_scalability(self, temp_dir): def test_openx_rlds_integration_benchmark(self, temp_dir): """Test RLDS integration if real RLDS data is available.""" - rlds_data_dir = "/home/kych/berkeley/datasets/rtx/fractal20220817_data/0.1.0/" + rlds_data_dir = "gs://gresearch/robotics/fractal20220817_data/0.1.0/" # Check if RLDS data is available if not os.path.exists(rlds_data_dir): diff --git a/tests/test_trajectory.py b/tests/test_trajectory.py index 19a8ead..e9b101c 100644 --- a/tests/test_trajectory.py +++ b/tests/test_trajectory.py @@ -8,9 +8,9 @@ import numpy as np import pytest -from fog_x import FeatureType, Trajectory, TrajectoryFactory -from fog_x.trajectory import CodecConfig -from fog_x.trajectory_base import FileSystemInterface, TimeProvider +from robodm import FeatureType, Trajectory, TrajectoryFactory +from robodm.trajectory import CodecConfig +from robodm.trajectory_base import FileSystemInterface, TimeProvider from .test_fixtures import MockFileSystem, MockTimeProvider