|
42 | 42 | # !pip3 install torchrl |
43 | 43 | # !pip3 install gym[mujoco] |
44 | 44 | # !pip3 install tqdm |
| 45 | +# !pip install torchrl gymnasium[mujoco] mujoco==3.1.1 (For Google Colab) |
45 | 46 | # |
46 | 47 | # Proximal Policy Optimization (PPO) is a policy-gradient algorithm where a |
47 | 48 | # batch of data is being collected and directly consumed to train the policy to maximise |
|
211 | 212 | # to a large panel of RL simulators, allowing you to easily swap one environment |
212 | 213 | # with another. For example, creating a wrapped gym environment can be achieved with few characters: |
213 | 214 | # |
| 215 | +# ----------------------------------------------------------------------------- |
| 216 | +# ⚙️ Google Colab and gymnasium compatibility for Mujoco-based environments |
| 217 | +# ----------------------------------------------------------------------------- |
| 218 | + |
| 219 | +# Try importing gymnasium (preferred), fallback to gym |
| 220 | +try: |
| 221 | + import gymnasium as gym |
| 222 | + USING_GYMNASIUM = True |
| 223 | +except ImportError: |
| 224 | + import gym |
| 225 | + USING_GYMNASIUM = False |
| 226 | + |
| 227 | +import os |
| 228 | + |
| 229 | +# In headless environments like Google Colab, Mujoco needs osmesa for rendering |
| 230 | +if "google.colab" in str(get_ipython()): |
| 231 | + os.environ["MUJOCO_GL"] = "osmesa" |
| 232 | + |
| 233 | +# Use a newer environment name if gymnasium is available |
| 234 | +# (v5 environments are preferred; gym uses v4) |
| 235 | +env_version = "v5" if USING_GYMNASIUM else "v4" |
| 236 | +env_id = f"InvertedDoublePendulum-{env_version}" |
| 237 | + |
| 238 | +# Replace this later: |
| 239 | + |
| 240 | +#base_env = GymEnv("InvertedDoublePendulum-v4", device=device) |
| 241 | +base_env = GymEnv(env_id, device=device) |
214 | 242 |
|
215 | | -base_env = GymEnv("InvertedDoublePendulum-v4", device=device) |
216 | 243 |
|
217 | 244 | ###################################################################### |
218 | 245 | # There are a few things to notice in this code: first, we created |
|
0 commit comments