Merge pull request #44 from Association-INTech/option-no-camera

Cbampeta · web-flow · commit e4469600211e · 2026-04-27T10:48:33.000+02:00
Option no camera
diff --git a/src/simulation/scripts/launch_train_multiprocessing.py b/src/simulation/scripts/launch_train_multiprocessing.py
@@ -7,11 +7,6 @@
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import SubprocVecEnv
 
-from extractors import (  # noqa: F401
-    CNN1DExtractor,
-    CNN1DResNetExtractor,
-    TemporalResNetExtractor,
-)
 from simulation import VehicleEnv
 from simulation import config as c
 from utils import onnx_utils
diff --git a/src/simulation/src/extractors/__init__.py b/src/simulation/src/extractors/__init__.py
@@ -1,9 +1,11 @@
 from .cnn_1d_extractor import CNN1DExtractor
 from .cnn_1d_resnet_extractor import CNN1DResNetExtractor
+from .cnn_1d_resnet_no_cam_extractor import CNN1DResNetNoCamExtractor
 from .temporal_resnet_extractor import TemporalResNetExtractor
 
 __all__ = [
     "CNN1DExtractor",
     "CNN1DResNetExtractor",
+    "CNN1DResNetNoCamExtractor",
     "TemporalResNetExtractor",
 ]
diff --git a/src/simulation/src/extractors/cnn_1d_resnet_no_cam_extractor.py b/src/simulation/src/extractors/cnn_1d_resnet_no_cam_extractor.py
@@ -0,0 +1,142 @@
+import torch
+import torch.nn as nn
+from gymnasium import spaces
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+
+class CNN1DResNetNoCamExtractor(BaseFeaturesExtractor):
+    context_size = 1
+    lidar_horizontal_resolution = 1024
+    camera_horizontal_resolution = 0
+    n_sensors = 1
+
+    # just an alias to avoid confusion because
+    # the lidar and camera have the same resolution
+    horizontal_resolution = 1024
+
+    def __init__(
+        self,
+        space: spaces.Box,
+        device: str = "cpu",
+    ):
+        net = nn.Sequential(
+            # shape = [batch_size, 1, 1024]
+            Compressor(device),
+            # shape = [batch_size, 64, 256]
+            ResidualBlock(64, 64, device=device),
+            ResidualBlock(64, 64, device=device),
+            ResidualBlock(64, 64, downsample=True, device=device),
+            # shape = [batch_size, 128, 128]
+            ResidualBlock(64, 64, device=device),
+            ResidualBlock(64, 64, device=device),
+            ResidualBlock(64, 128, downsample=True, device=device),
+            # shape = [batch_size, 128, 64]
+            ResidualBlock(128, 128, device=device),
+            ResidualBlock(128, 128, device=device),
+            ResidualBlock(128, 128, downsample=True, device=device),
+            # shape = [batch_size, 256, 32]
+            ResidualBlock(128, 128, device=device),
+            ResidualBlock(128, 128, device=device),
+            ResidualBlock(128, 256, downsample=True, device=device),
+            # shape = [batch_size, 256, 16]
+            ResidualBlock(256, 256, device=device),
+            ResidualBlock(256, 256, device=device),
+            ResidualBlock(256, 256, downsample=True, device=device),
+            # shape = [batch_size, 256, 8]
+            nn.AvgPool1d(8),
+            # shape = [batch_size, 256, 1]
+            nn.Flatten(),
+            # shape = [batch_size, 256]
+        )
+
+        # Compute shape by doing one forward pass
+        with torch.no_grad():
+            n_flatten = net(
+                torch.zeros(
+                    [1, 1, self.context_size, self.horizontal_resolution], device=device
+                )
+            ).shape[1]
+
+        super().__init__(space, n_flatten)
+
+        # we cannot assign this directly to self.cnn before calling the super constructor
+        self.net = net
+
+    def forward(self, observations: torch.Tensor) -> torch.Tensor:
+        return self.net(observations)
+
+
+class Compressor(nn.Module):
+    def __init__(self, device: str = "cpu"):
+        super().__init__()
+        # WARNING : do not use inplace=True because it would modify the rollout buffer
+        self.conv = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3, device=device)
+        self.dropout = nn.Dropout1d(0.3)
+        self.pool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x[:, :, 0]
+        x = self.conv(x)
+        x = self.dropout(x)
+        x = self.pool(x)
+        return x
+
+
+class ResidualBlock(nn.Module):
+    """
+    basic block with a residual connection
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        downsample: bool = False,
+        device: str = "cpu",
+    ):
+        super().__init__()
+        if downsample:
+            stride = 2
+            self.downsample = nn.Conv1d(
+                in_channels, out_channels, kernel_size=1, stride=2, device=device
+            )
+        elif in_channels == out_channels:
+            stride = 1
+            self.downsample = nn.Identity()
+        else:
+            stride = 1
+            self.downsample = nn.Conv1d(
+                in_channels, out_channels, kernel_size=1, stride=1, device=device
+            )
+
+        self.bn1 = nn.BatchNorm1d(in_channels, device=device)
+        self.conv1 = nn.Conv1d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            device=device,
+        )
+
+        self.bn2 = nn.BatchNorm1d(out_channels, device=device)
+        self.conv2 = nn.Conv1d(
+            out_channels, out_channels, kernel_size=3, padding=1, device=device
+        )
+
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout1d(0.4)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        y = self.bn1(x)
+        y = self.relu(y)
+        y = self.conv1(y)
+
+        y = self.bn2(y)
+        y = self.relu(y)
+        y = self.dropout(y)
+        y = self.conv2(y)
+
+        y += self.downsample(x)
+
+        return y
diff --git a/src/simulation/src/simulation/config.py b/src/simulation/src/simulation/config.py
@@ -9,13 +9,14 @@
 from extractors import (  # noqa: F401
     CNN1DExtractor,
     CNN1DResNetExtractor,
+    CNN1DResNetNoCamExtractor,
     TemporalResNetExtractor,
 )
 
 # Webots environments config
-n_map = 2
+n_map = 3
 n_simulations = 1
-n_vehicles = 2
+n_vehicles = 1
 n_stupid_vehicles = 0
 n_actions_steering = 16
 n_actions_speed = 16
@@ -41,12 +42,19 @@
 
 # Common extractor shared between the policy and value networks
 # (cf: https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html)
-ExtractorClass = TemporalResNetExtractor
+ExtractorClass = CNN1DResNetNoCamExtractor
 context_size = ExtractorClass.context_size
 lidar_horizontal_resolution = ExtractorClass.lidar_horizontal_resolution
 camera_horizontal_resolution = ExtractorClass.camera_horizontal_resolution
 n_sensors = ExtractorClass.n_sensors
 
+if (
+    lidar_horizontal_resolution != camera_horizontal_resolution
+    and camera_horizontal_resolution != 0
+):
+    raise NotImplementedError(
+        "Unhomogenous lidar and camera shape is only supported if camera_horizontal_resolution == 0"
+    )
 
 # Architecture of the model
 policy_kwargs: Dict[str, Any] = dict(
diff --git a/src/simulation/src/simulation/vehicle_env.py b/src/simulation/src/simulation/vehicle_env.py
@@ -46,7 +46,7 @@ def __init__(self, simulation_rank: int, vehicle_rank: int):
         self.vehicle_rank = vehicle_rank
 
         self.handler = logging.FileHandler(
-            f"/tmp/autotech/Voiture_{self.simulation_rank}_{self.vehicle_rank}.log"
+            f"/tmp/autotech/vehicle_{self.simulation_rank}_{self.vehicle_rank}.log"
         )
         self.handler.setFormatter(c.FORMATTER)
         self.log = logging.getLogger(
@@ -57,15 +57,24 @@ def __init__(self, simulation_rank: int, vehicle_rank: int):
 
         self.log.info("Initialisation started")
 
-        # this is only true if lidar_horizontal_resolution = camera_horizontal_resolution
+        # 2 channels if camera_horizontal_resolution != 0
+        # 1 channel otherwise
         box_min = np.zeros(
-            [2, c.context_size, c.lidar_horizontal_resolution], dtype=np.float32
+            [
+                1 + (c.camera_horizontal_resolution != 0),
+                c.context_size,
+                c.lidar_horizontal_resolution,
+            ],
+            dtype=np.float32,
         )
-        box_max = (
-            np.ones(
-                [2, c.context_size, c.lidar_horizontal_resolution], dtype=np.float32
-            )
-            * 30
+        box_max = np.full(
+            [
+                1 + (c.camera_horizontal_resolution != 0),
+                c.context_size,
+                c.lidar_horizontal_resolution,
+            ],
+            30,
+            dtype=np.float32,
         )
 
         self.observation_space = gym.spaces.Box(box_min, box_max, dtype=np.float32)
@@ -91,6 +100,7 @@ def __init__(self, simulation_rank: int, vehicle_rank: int):
                     "--mode=fast",
                     "--minimize",
                     "--batch",
+                    # "--no-rendering",
                 ]
             )
 
@@ -111,11 +121,17 @@ def __init__(self, simulation_rank: int, vehicle_rank: int):
     def reset(self, seed: int | None = None, **options) -> Tuple[ObsType, Dict]:
         # basically useless function
 
-        # lidar data
-        # this is true for lidar_horizontal_resolution = camera_horizontal_resolution
+        # 2 channels if camera_horizontal_resolution != 0
+        # 1 channel otherwise
         self.context = obs = np.zeros(
-            [2, c.context_size, c.lidar_horizontal_resolution], dtype=np.float32
+            [
+                1 + (c.camera_horizontal_resolution != 0),
+                c.context_size,
+                c.lidar_horizontal_resolution,
+            ],
+            dtype=np.float32,
         )
+
         info = {}
         self.log.info("reset finished\n")
         return obs, info
@@ -127,7 +143,7 @@ def step(self, action: ActType):
         self.fifo_w.flush()
 
         # communication with the supervisor
-        self.log.debug("trying to get info from supervisor")
+        self.log.info(f"Waiting for (obs, reward, done, truncated) from SUPERVISOR_{self.simulation_rank}_{self.vehicle_rank}")
         cur_state = np.frombuffer(
             self.fifo_r.read(
                 np.dtype(np.float32).itemsize
@@ -159,9 +175,14 @@ def step(self, action: ActType):
         lidar_obs = cur_state[: c.lidar_horizontal_resolution]
         camera_obs = cur_state[c.lidar_horizontal_resolution :]
 
-        self.context = obs = np.concatenate(
-            [self.context[:, 1:], [lidar_obs[None], camera_obs[None]]], axis=1
-        )
+        if c.camera_horizontal_resolution == 0:
+            self.context = obs = np.concatenate(
+                [self.context[:, 1:], lidar_obs[None, None]], axis=1
+            )
+        else:
+            self.context = obs = np.concatenate(
+                [self.context[:, 1:], [lidar_obs[None], camera_obs[None]]], axis=1
+            )
 
         self.log.info("step over")
 
diff --git a/src/simulation/src/utils/onnx_utils.py b/src/simulation/src/utils/onnx_utils.py
@@ -22,7 +22,12 @@ def export_onnx(sb_model: OnPolicyAlgorithm, path: str):
     torch_model = get_torch_model(sb_model)
     torch_model.eval()
 
-    example_input = torch.randn(1, 2, c.context_size, c.lidar_horizontal_resolution)
+    example_input = torch.randn(
+        1,
+        1 + (c.camera_horizontal_resolution != 0),
+        c.context_size,
+        c.lidar_horizontal_resolution,
+    )
 
     with torch.no_grad():
         torch.onnx.export(
@@ -50,7 +55,12 @@ def test_onnx(model: OnPolicyAlgorithm):
     true_model = get_torch_model(model)
 
     loss_fn = nn.MSELoss()
-    x = torch.randn(1000, 2, c.context_size, c.lidar_horizontal_resolution)
+    x = torch.randn(
+        1000,
+        1 + (c.camera_horizontal_resolution != 0),
+        c.context_size,
+        c.lidar_horizontal_resolution,
+    )
 
     try:
         class_name = model.policy.features_extractor.__class__.__name__
diff --git a/src/simulation/src/webots/controllers/controller_vehicle_driver/controller_vehicle_driver.py b/src/simulation/src/webots/controllers/controller_vehicle_driver/controller_vehicle_driver.py
@@ -69,11 +69,11 @@ def __init__(self):
 
         # Logger
         self.handler = logging.FileHandler(
-            f"/tmp/autotech/Voiture_{self.simulation_rank}_{self.vehicle_rank}.log"
+            f"/tmp/autotech/vehicle_{self.simulation_rank}_{self.vehicle_rank}.log"
         )
         self.handler.setFormatter(c.FORMATTER)
         self.log = logging.getLogger(
-            f"CLIENT_{self.simulation_rank}_{self.vehicle_rank}"
+            f"VEHICLE_{self.simulation_rank}_{self.vehicle_rank}"
         )
         self.log.setLevel(level=c.LOG_LEVEL)
         self.log.addHandler(self.handler)
@@ -96,19 +96,23 @@ def observe(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
 
         lidar_data = np.array(self.lidar.getRangeImage(), dtype=np.float32)
 
-        camera_data = np.array(self.camera.getImageArray(), dtype=np.float32)
-        # shape = (1080, 1, 3)
-        camera_data = camera_data.transpose(1, 2, 0)[0]
-        # shape = (3, 1080)
-        color = np.argmax(camera_data, axis=0)
-        camera_data = (
-            (color == 0).astype(np.float32) * -1
-            + (color == 1).astype(np.float32) * 1
-            + (color == 2).astype(np.float32) * 0
-        )
-        # red   -> -1
-        # green -> 1
-        # blue  -> 0
+        if c.camera_horizontal_resolution == 0:
+            # empty array
+            camera_data = np.zeros([0], dtype=np.float32)
+        else:
+            camera_data = np.array(self.camera.getImageArray(), dtype=np.float32)
+            # shape = (camera_horizontal_resolution, 1, 3)
+            camera_data = camera_data.transpose(1, 2, 0)[0]
+            # shape = (3, 1080)
+            color = np.argmax(camera_data, axis=0)
+            camera_data = (
+                (color == 0).astype(np.float32) * -1
+                + (color == 1).astype(np.float32) * 1
+                + (color == 2).astype(np.float32) * 0
+            )
+            # red   -> -1
+            # green -> 1
+            # blue  -> 0
 
         return (sensor_data, lidar_data, camera_data)
 
@@ -142,7 +146,7 @@ def ai(self):
         self.fifo_w.write(np.concatenate(obs).tobytes())
         self.fifo_w.flush()
 
-        self.log.debug("Trying to read action from the server")
+        self.log.info(f"Waiting for an action from SERVER_{self.simulation_rank}_{self.vehicle_rank}")
         action = np.frombuffer(
             self.fifo_r.read(np.dtype(np.int64).itemsize * 2), dtype=np.int64
         )
diff --git a/src/simulation/src/webots/controllers/controller_world_init/controller_world_init.py b/src/simulation/src/webots/controllers/controller_world_init/controller_world_init.py
diff --git a/src/simulation/src/webots/controllers/controller_world_supervisor/controller_world_supervisor.py b/src/simulation/src/webots/controllers/controller_world_supervisor/controller_world_supervisor.py
diff --git a/uv.lock b/uv.lock