HumanCompatibleAI
diff --git a/‎.gitattributes‎
Lines changed: 2 additions & 0 deletions b/‎.gitattributes‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/algorithms/density.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/algorithms/density.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎experiments/convert_traj.py‎
Lines changed: 2 additions & 2 deletions b/‎experiments/convert_traj.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 0 deletions b/‎setup.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/imitation/algorithms/adversarial/common.py‎
Lines changed: 1 addition & 1 deletion b/‎src/imitation/algorithms/adversarial/common.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/imitation/algorithms/bc.py‎
Lines changed: 1 addition & 1 deletion b/‎src/imitation/algorithms/bc.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/imitation/algorithms/dagger.py‎
Lines changed: 7 additions & 6 deletions b/‎src/imitation/algorithms/dagger.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/imitation/data/huggingface_utils.py‎
Lines changed: 176 additions & 0 deletions b/‎src/imitation/data/huggingface_utils.py‎
Lines changed: 176 additions & 0 deletions
diff --git a/‎src/imitation/data/serialize.py‎
Lines changed: 101 additions & 0 deletions b/‎src/imitation/data/serialize.py‎
Lines changed: 101 additions & 0 deletions
@@ -1 +1,3 @@
 *.ipynb linguist-vendored
+tests/testdata/pickle_format_rollout.pkl filter=lfs diff=lfs merge=lfs -text
+tests/testdata/npz_format_rollout.npz filter=lfs diff=lfs merge=lfs -text
@@ -19,13 +19,13 @@ Detailed example notebook: :doc:`../tutorials/7_train_density`
     from stable_baselines3.common.policies import ActorCriticPolicy
 
     from imitation.algorithms import density as db
-    from imitation.data import types
+    from imitation.data import serialize
     from imitation.util import util
 
     rng = np.random.default_rng(0)
 
     env = util.make_vec_env("Pendulum-v1", rng=rng, n_envs=2)
-    rollouts = types.load("../tests/testdata/expert_models/pendulum_0/rollouts/final.npz")
+    rollouts = serialize.load("../tests/testdata/expert_models/pendulum_0/rollouts/final.npz")
 
     imitation_trainer = PPO(ActorCriticPolicy, env)
     density_trainer = db.DensityAlgorithm(
 
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from imitation.data import rollout, types
+from imitation.data import rollout, serialize, types
 
 
 def convert_trajs_to_sb(trajs: Sequence[types.TrajectoryWithRew]) -> dict:
@@ -32,7 +32,7 @@ def main():
     dst_path = Path(args.dst_path)
 
     assert src_path.is_file()
-    src_trajs = types.load_with_rewards(src_path)
+    src_trajs = serialize.load_with_rewards(src_path)
     dst_trajs = convert_trajs_to_sb(src_trajs)
     os.makedirs(dst_path.parent, exist_ok=True)
     with open(dst_path, "wb") as f:
 
@@ -208,6 +208,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str:
         "sacred>=0.8.4",
         "tensorboard>=1.14",
         "huggingface_sb3>=2.2.1",
+        "datasets>=2.8.0",
     ],
     tests_require=TESTS_REQUIRE,
     extras_require={
 
@@ -202,7 +202,7 @@ def __init__(
         self.venv = venv
         self.gen_algo = gen_algo
         self._reward_net = reward_net.to(gen_algo.device)
-        self._log_dir = types.parse_path(log_dir)
+        self._log_dir = util.parse_path(log_dir)
 
         # Create graph for optimising/recording stats on discriminator
         self._disc_opt_cls = disc_opt_cls
 
@@ -490,4 +490,4 @@ def save_policy(self, policy_path: types.AnyPath) -> None:
         Args:
             policy_path: path to save policy to.
         """
-        th.save(self.policy, types.parse_path(policy_path))
+        th.save(self.policy, util.parse_path(policy_path))
@@ -20,8 +20,9 @@
 from torch.utils import data as th_data
 
 from imitation.algorithms import base, bc
-from imitation.data import rollout, types
+from imitation.data import rollout, serialize, types
 from imitation.util import logger as imit_logger
+from imitation.util import util
 
 
 class BetaSchedule(abc.ABC):
@@ -118,7 +119,7 @@ def reconstruct_trainer(
         A deserialized `DAggerTrainer`.
     """
     custom_logger = custom_logger or imit_logger.configure()
-    scratch_dir = types.parse_path(scratch_dir)
+    scratch_dir = util.parse_path(scratch_dir)
     checkpoint_path = scratch_dir / "checkpoint-latest.pt"
     trainer = th.load(checkpoint_path, map_location=utils.get_device(device))
     trainer.venv = venv
@@ -133,7 +134,7 @@ def _save_dagger_demo(
     rng: np.random.Generator,
     prefix: str = "",
 ) -> None:
-    save_dir = types.parse_path(save_dir)
+    save_dir = util.parse_path(save_dir)
     assert isinstance(trajectory, types.Trajectory)
     actual_prefix = f"{prefix}-" if prefix else ""
     randbits = int.from_bytes(rng.bytes(16), "big")
@@ -143,7 +144,7 @@ def _save_dagger_demo(
     assert (
         not npz_path.exists()
     ), "The following DAgger demonstration path already exists: {0}".format(npz_path)
-    types.save(npz_path, [trajectory])
+    serialize.save(npz_path, [trajectory])
     logging.info(f"Saved demo at '{npz_path}'")
 
 
@@ -353,7 +354,7 @@ def __init__(
         if beta_schedule is None:
             beta_schedule = LinearBetaSchedule(15)
         self.beta_schedule = beta_schedule
-        self.scratch_dir = types.parse_path(scratch_dir)
+        self.scratch_dir = util.parse_path(scratch_dir)
         self.venv = venv
         self.round_num = 0
         self._last_loaded_round = -1
@@ -399,7 +400,7 @@ def _load_all_demos(self) -> Tuple[types.Transitions, List[int]]:
         for round_num in range(self._last_loaded_round + 1, self.round_num + 1):
             round_dir = self._demo_dir_path_for_round(round_num)
             demo_paths = self._get_demo_paths(round_dir)
-            self._all_demos.extend(types.load(p)[0] for p in demo_paths)
+            self._all_demos.extend(serialize.load(p)[0] for p in demo_paths)
             num_demos_by_round.append(len(demo_paths))
         logging.info(f"Loaded {len(self._all_demos)} total")
         demo_transitions = rollout.flatten_trajectories(self._all_demos)
 
@@ -0,0 +1,176 @@
+"""Helpers to convert between Trajectories and HuggingFace's datasets library."""
+import functools
+from typing import Any, Dict, Iterable, Sequence, cast
+
+import datasets
+import jsonpickle
+import numpy as np
+
+from imitation.data import types
+
+
+class TrajectoryDatasetSequence(Sequence[types.Trajectory]):
+    """A wrapper to present a HF dataset as a sequence of trajectories.
+
+    Converts the dataset to a sequence of trajectories on the fly.
+    """
+
+    def __init__(self, dataset: datasets.Dataset):
+        """Construct a TrajectoryDatasetSequence."""
+        # TODO: this is just a temporary workaround for
+        #  https://github.com/huggingface/datasets/issues/5517
+        #  switch to .with_format("numpy") once it's fixed
+        def numpy_transform(batch):
+            return {key: np.asarray(val) for key, val in batch.items()}
+
+        self._dataset = dataset.with_transform(numpy_transform)
+        self._trajectory_class = (
+            types.TrajectoryWithRew if "rews" in dataset.features else types.Trajectory
+        )
+
+    def __len__(self) -> int:
+        return len(self._dataset)
+
+    def __getitem__(self, idx):
+
+        if isinstance(idx, slice):
+            dataslice = self._dataset[idx]
+
+            # Extract the trajectory kwargs from the dataset slice
+            trajectory_kwargs = [
+                {key: dataslice[key][i] for key in dataslice}
+                for i in range(len(dataslice["obs"]))
+            ]
+
+            # Ensure that the infos are decoded lazily using jsonpickle
+            for kwargs in trajectory_kwargs:
+                kwargs["infos"] = _LazyDecodedList(kwargs["infos"])
+
+            return [self._trajectory_class(**kwargs) for kwargs in trajectory_kwargs]
+        else:
+            # Extract the trajectory kwargs from the dataset
+            kwargs = self._dataset[idx]
+
+            # Ensure that the infos are decoded lazily using jsonpickle
+            kwargs["infos"] = _LazyDecodedList(kwargs["infos"])
+
+            return self._trajectory_class(**kwargs)
+
+
+class _LazyDecodedList(Sequence[Any]):
+    """A wrapper to lazily decode a list of jsonpickled strings.
+
+    Decoded results are cached to avoid decoding the same string multiple times.
+
+    This is used to decode the infos of a trajectory only when they are accessed.
+    """
+
+    def __init__(self, encoded_list: Sequence[str]):
+        self._encoded_list = encoded_list
+
+    def __len__(self):
+        return len(self._encoded_list)
+
+    # arbitrary cache size just to put a limit on memory usage
+    @functools.lru_cache(maxsize=100000)
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            return [jsonpickle.decode(info) for info in self._encoded_list[idx]]
+        else:
+            return jsonpickle.decode(self._encoded_list[idx])
+
+
+def make_dict_from_trajectory(trajectory: types.Trajectory):
+    """Convert a Trajectory to a dict.
+
+    The dict has the following fields:
+    * obs: The observations. Shape: (num_timesteps, obs_dim). dtype: float.
+    * acts: The actions. Shape: (num_timesteps, act_dim). dtype: float.
+    * infos: The infos. Shape: (num_timesteps, ). dtype: (jsonpickled) str.
+    * terminal: The terminal flags. Shape: (num_timesteps, ). dtype: bool.
+    * rews: The rewards. Shape: (num_timesteps, ). dtype: float. if applicable.
+
+    Args:
+        trajectory: The trajectory to convert.
+
+    Returns:
+        A dict representing the trajectory.
+    """
+    # Replace 'None' values for `infos`` with array of empty dicts
+    infos = cast(
+        Sequence[Dict[str, Any]],
+        trajectory.infos if trajectory.infos is not None else [{}] * len(trajectory),
+    )
+
+    # Encode infos as jsonpickled strings
+    encoded_infos = [jsonpickle.encode(info) for info in infos]
+
+    trajectory_dict = dict(
+        obs=trajectory.obs,
+        acts=trajectory.acts,
+        infos=encoded_infos,
+        terminal=trajectory.terminal,
+    )
+
+    # Add rewards if applicable
+    if isinstance(trajectory, types.TrajectoryWithRew):
+        trajectory_dict["rews"] = trajectory.rews
+
+    return trajectory_dict
+
+
+def trajectories_to_dict(
+    trajectories: Sequence[types.Trajectory],
+) -> Dict[str, Sequence[Any]]:
+    """Convert a sequence of trajectories to a dict.
+
+    The dict has the following fields:
+
+    * obs: The observations. Shape: (num_trajectories, num_timesteps, obs_dim).
+    * acts: The actions. Shape: (num_trajectories, num_timesteps, act_dim).
+    * infos: The infos. Shape: (num_trajectories, num_timesteps) as jsonpickled str.
+    * terminal: The terminal flags. Shape: (num_trajectories, num_timesteps, ).
+    * rews: The rewards. Shape: (num_trajectories, num_timesteps) if applicable.
+
+    This dict can be used to construct a HuggingFace dataset.
+
+    Args:
+        trajectories: The trajectories to save.
+
+    Raises:
+        ValueError: If not all trajectories have the same type, i.e. some are
+            `Trajectory` and others are `TrajectoryWithRew`.
+
+    Returns:
+        A dict representing the trajectories.
+    """
+    # Check that all trajectories have rewards or none have rewards
+    has_reward = [isinstance(traj, types.TrajectoryWithRew) for traj in trajectories]
+    all_trajectories_have_reward = all(has_reward)
+    if not all_trajectories_have_reward and any(has_reward):
+        raise ValueError("Some trajectories have rewards but not all")
+
+    # Convert to dict
+    trajectory_dict: Dict[str, Sequence[Any]] = dict(
+        obs=[traj.obs for traj in trajectories],
+        acts=[traj.acts for traj in trajectories],
+        # Replace 'None' values for `infos`` with array of empty dicts
+        infos=[
+            traj.infos if traj.infos is not None else [{}] * len(traj)
+            for traj in trajectories
+        ],
+        terminal=[traj.terminal for traj in trajectories],
+    )
+
+    # Encode infos as jsonpickled strings
+    trajectory_dict["infos"] = [
+        [jsonpickle.encode(info) for info in traj_infos]
+        for traj_infos in cast(Iterable[Iterable[Dict]], trajectory_dict["infos"])
+    ]
+
+    # Add rewards if applicable
+    if all_trajectories_have_reward:
+        trajectory_dict["rews"] = [
+            cast(types.TrajectoryWithRew, traj).rews for traj in trajectories
+        ]
+    return trajectory_dict
@@ -0,0 +1,101 @@
+"""Serialization utilities for trajectories."""
+import logging
+import os
+import warnings
+from typing import Mapping, Sequence, cast
+
+import datasets
+import huggingface_sb3 as hfsb3
+import numpy as np
+
+from imitation.data import huggingface_utils
+from imitation.data.types import AnyPath, Trajectory, TrajectoryWithRew
+from imitation.util import util
+
+
+def save(path: AnyPath, trajectories: Sequence[Trajectory]) -> None:
+    """Save a sequence of Trajectories to disk using HuggingFace's datasets library.
+
+    Args:
+        path: Trajectories are saved to this path.
+        trajectories: The trajectories to save.
+    """
+    p = util.parse_path(path)
+    d = datasets.Dataset.from_dict(huggingface_utils.trajectories_to_dict(trajectories))
+    d.save_to_disk(p)
+    logging.info(f"Dumped demonstrations to {p}.")
+
+
+def load(path: AnyPath) -> Sequence[Trajectory]:
+    """Loads a sequence of trajectories saved by `save()` from `path`."""
+    # Interestingly, np.load will just silently load a normal pickle file when you
+    # set `allow_pickle=True`. So this call should succeed for both the new compressed
+    # .npz format and the old pickle based format. To tell the difference we need to
+    # look at the type of the resulting object. If it's the new compressed format,
+    # it should be a Mapping that we need to decode, whereas if it's the old format
+    # it's just the sequence of trajectories, and we can return it directly.
+
+    if os.path.isdir(path):  # huggingface datasets format
+        dataset = datasets.load_from_disk(str(path))
+        if not isinstance(dataset, datasets.Dataset):
+            raise ValueError(
+                f"Expected to load a `datasets.Dataset` but got {type(dataset)}",
+            )
+
+        return huggingface_utils.TrajectoryDatasetSequence(dataset)
+
+    data = np.load(path, allow_pickle=True)  # works for both .npz and .pkl
+
+    if isinstance(data, Sequence):  # pickle format
+        warnings.warn("Loading old pickle version of Trajectories", DeprecationWarning)
+        return data
+    if isinstance(data, Mapping):  # .npz format
+        warnings.warn("Loading old npz version of Trajectories", DeprecationWarning)
+        num_trajs = len(data["indices"])
+        fields = [
+            # Account for the extra obs in each trajectory
+            np.split(data["obs"], data["indices"] + np.arange(num_trajs) + 1),
+            np.split(data["acts"], data["indices"]),
+            np.split(data["infos"], data["indices"]),
+            data["terminal"],
+        ]
+        if "rews" in data:
+            fields = [
+                *fields,
+                np.split(data["rews"], data["indices"]),
+            ]
+            return [TrajectoryWithRew(*args) for args in zip(*fields)]
+        else:
+            return [Trajectory(*args) for args in zip(*fields)]
+    else:  # pragma: no cover
+        raise ValueError(
+            f"Expected either an .npz file or a pickled sequence of trajectories; "
+            f"got a pickled object of type {type(data).__name__}",
+        )
+
+
+def load_with_rewards(path: AnyPath) -> Sequence[TrajectoryWithRew]:
+    """Loads a sequence of trajectories with rewards from a file."""
+    data = load(path)
+
+    mismatched_types = [
+        type(traj) for traj in data if not isinstance(traj, TrajectoryWithRew)
+    ]
+    if mismatched_types:
+        raise ValueError(
+            f"Expected all trajectories to be of type `TrajectoryWithRew`, "
+            f"but found {mismatched_types[0].__name__}",
+        )
+
+    return cast(Sequence[TrajectoryWithRew], data)
+
+
+def load_rollouts_from_huggingface(
+    algo_name: str,
+    env_name: str,
+    organization: str = "HumanCompatibleAI",
+) -> str:
+    model_name = hfsb3.ModelName(algo_name, hfsb3.EnvironmentName(env_name))
+    repo_id = hfsb3.ModelRepoId(organization, model_name)
+    filename = hfsb3.load_from_hub(repo_id, "rollouts.npz")
+    return filename
Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`*.ipynb linguist-vendored`
	`2`	`+tests/testdata/pickle_format_rollout.pkl filter=lfs diff=lfs merge=lfs -text`
	`3`	`+tests/testdata/npz_format_rollout.npz filter=lfs diff=lfs merge=lfs -text`