unfonctionnal: split lauch programm

Markgraf · Markgraf · commit 469dbf9d5345 · 2025-11-22T00:09:55.000+01:00
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ uv sync --extra rpi
 
 Navigate to the simulator directory.
 ```bash
-cd src/Simulateur
+cd scripts
 ```
 
 Run the multi-process training script.
diff --git a/src/Simulateur/WebotsSimulationGymEnvironment.py b/src/Simulateur/WebotsSimulationGymEnvironment.py
@@ -1,27 +1,9 @@
 import os
-import time
 from typing import *
-
-import matplotlib.pyplot as plt
 import numpy as np
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import torch.multiprocessing as mp
-
-from stable_baselines3 import PPO
-from stable_baselines3.common.env_checker import check_env
-from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
-
 import gymnasium as gym
 
-from onnx_utils import export_onnx, test_onnx
 from config import *
-from CNN1DExtractor import CNN1DExtractor
-from TemporalResNetExtractor import TemporalResNetExtractor
-from CNN1DResNetExtractor import CNN1DResNetExtractor
-
-if B_DEBUG: from DynamicActionPlotCallback import DynamicActionPlotDistributionCallback
 
 
 def log(s: str):
@@ -108,112 +90,4 @@ def step(self, action):
         # check if the context is correct
         # if self.simulation_rank == 0:
         #     print(f"{(obs[0] == 0).mean():.3f} {(obs[1] == 0).mean():.3f}")
-        return obs, reward, done, truncated, info
-
-
-if __name__ == "__main__":
-    if not os.path.exists("/tmp/autotech/"):
-        os.mkdir("/tmp/autotech/")
-
-    os.system('if [ -n "$(ls /tmp/autotech)" ]; then rm /tmp/autotech/*; fi')
-    if B_DEBUG:
-        print("Webots started", file=open("/tmp/autotech/logs", "w"))
-
-    def make_env(rank: int):
-        log(f"CAREFUL !!! created an SERVER env with {rank=}")
-        return WebotsSimulationGymEnvironment(rank)
-
-    envs = SubprocVecEnv([lambda rank=rank : make_env(rank) for rank in range(n_simulations)])
-
-    ExtractorClass = TemporalResNetExtractor
-
-    policy_kwargs = dict(
-        features_extractor_class=ExtractorClass,
-        features_extractor_kwargs=dict(
-            context_size=context_size,
-            lidar_horizontal_resolution=lidar_horizontal_resolution,
-            camera_horizontal_resolution=camera_horizontal_resolution,
-            device=device
-        ),
-        activation_fn=nn.ReLU,
-        net_arch=[512, 512, 512],
-    )
-
-
-    ppo_args = dict(
-        n_steps=4096,
-        n_epochs=10,
-        batch_size=256,
-        learning_rate=3e-4,
-        gamma=0.99,
-        verbose=1,
-        normalize_advantage=True,
-        device=device
-    )
-
-
-    save_path = __file__.rsplit("/", 1)[0] + "/checkpoints/" + ExtractorClass.__name__ + "/"
-    if not os.path.exists(save_path):
-        os.mkdir(save_path)
-
-    print(save_path)
-    print(os.listdir(save_path))
-
-    valid_files = [x for x in os.listdir(save_path) if x.rstrip(".zip").isnumeric()]
-
-    if valid_files:
-        model_name = max(
-            valid_files,
-            key=lambda x : int(x.rstrip(".zip"))
-        )
-        print(f"Loading model {save_path + model_name}")
-        model = PPO.load(
-            save_path + model_name,
-            envs,
-            **ppo_args,
-            policy_kwargs=policy_kwargs
-        )
-        i = int(model_name.rstrip(".zip")) + 1
-        print(f"----- Model found, loading {model_name} -----")
-
-    else:
-        model = PPO(
-            "MlpPolicy",
-            envs,
-            **ppo_args,
-            policy_kwargs=policy_kwargs
-        )
-
-        i = 0
-        print("----- Model not found, creating a new one -----")
-
-    print("MODEL HAS HYPER PARAMETERS:")
-    print(f"{model.learning_rate=}")
-    print(f"{model.gamma=}")
-    print(f"{model.verbose=}")
-    print(f"{model.n_steps=}")
-    print(f"{model.n_epochs=}")
-    print(f"{model.batch_size=}")
-    print(f"{model.device=}")
-
-    log(f"SERVER : finished executing")
-
-    # obs = envs.reset()
-    # while True:
-    #     action, _states = model.predict(obs, deterministic=True)  # Use deterministic=True for evaluation
-    #     obs, reward, done, info = envs.step(action)
-    #     envs.render()  # Optional: visualize the environment
-
-
-    while True:
-        export_onnx(model)
-        test_onnx(model)
-
-        if B_DEBUG:
-            model.learn(total_timesteps=500_000, callback=DynamicActionPlotDistributionCallback())
-        else:
-            model.learn(total_timesteps=500_000)
-
-        model.save(save_path + str(i))
-
-        i += 1
+        return obs, reward, done, truncated, info
diff --git a/src/Simulateur/__init__.py b/src/Simulateur/__init__.py
diff --git a/src/Simulateur/config.py b/src/Simulateur/config.py
@@ -2,7 +2,7 @@
 from torch.cuda import is_available
 
 n_map = 2
-n_simulations = 8
+n_simulations = 2
 n_vehicles = 1
 n_stupid_vehicles = 0
 n_actions_steering = 16
diff --git a/src/Simulateur/controllers/controllerWorldSupervisor/controllerWorldSupervisor.py b/src/Simulateur/controllers/controllerWorldSupervisor/controllerWorldSupervisor.py
@@ -2,6 +2,7 @@
 from typing import *
 import numpy as np
 import gymnasium as gym
+import time 
 
 from checkpointmanager import CheckpointManager, checkpoints
 
@@ -219,7 +220,17 @@ def main():
         #Prédiction pour séléctionner une action à partir de l"observation
         for e in envs:
             log(f"CLIENT{simulation_rank}/{e.vehicle_rank} : trying to read from fifo")
-            action = np.frombuffer(e.fifo_r.read(np.dtype(np.int64).itemsize * 2), dtype=np.int64)
+
+            timeout = 10  # seconds
+            start_time = time.time()
+
+            while time.time() - start_time < timeout:
+                raw = e.fifo_r.read(np.dtype(np.int64).itemsize * 2)
+                if len(raw) == np.dtype(np.int64).itemsize * 2:
+                    # We got the full action data
+                    action = np.frombuffer(raw, dtype=np.int64)
+                    break
+                
             log(f"CLIENT{simulation_rank}/{e.vehicle_rank} : received {action=}")
 
             obs, reward, done, truncated, info = e.step(action)