Cognitive-AI-Systems
diff --git a/‎.gitignore‎
Lines changed: 132 additions & 0 deletions b/‎.gitignore‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 65 additions & 0 deletions b/‎README.md‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎agents/assistant_switcher.py‎
Lines changed: 36 additions & 0 deletions b/‎agents/assistant_switcher.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎agents/epom.py‎
Lines changed: 135 additions & 0 deletions b/‎agents/epom.py‎
Lines changed: 135 additions & 0 deletions
@@ -0,0 +1,132 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Ignore MAC service files
+.DS_Store
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 AIRI - Artificial Intelligence Research Institute
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,65 @@
+# When to Switch: Planning and Learning For Partially Observable Multi-Agent Pathfinding
+
+This repository provides the implementation of the "When to Switch" paper, offering various policies and algorithms
+designed to address the challenging problem of finding non-conflicting paths for a set of agents in an environment that
+is only partially observable to each agent (PO-MAPF).
+The repository includes two main policies: one is based on search-based re-planning (**RePlan**), and the other is based
+on reinforcement learning (**EPOM**).
+Additionally, the repository features three implementations of mixed policies, which switch between **RePlan** and **EPOM**.
+
+## Installation
+
+Install all dependencies using:
+
+```bash
+pip install -r docker/requirements.txt
+```
+
+## Inference Example
+
+
+To download pretrained weights, use this [link](https://drive.google.com/file/d/1LMu2YOxzQbWDDacQaV7R-Pizkvjpp8R_/view?usp=sharing)
+
+Execute **EPOM**, **RePlan**, **ASwitcher**, **LSwitcher**, and **HSwitcher** to generate animations using pre-trained
+weights with the following command:
+
+```bash
+python example.py
+```
+
+
+The animations will be stored in the ```renders``` folder.
+
+## Training EPOM
+
+To train **EPOM**, execute ```train_epom.py``` with the ```learning/train.yaml``` config file:
+
+```bash
+python train_epom.py --config_path="learning/train.yaml"
+```
+
+## Training LSwitcher
+
+To train **LSwitcher** estimator for the **RePlan** or **EPOM** algorithm, use the commands below:
+
+```bash
+python train_lswitcher.py --algo="RePlan"
+```
+
+```bash
+python train_lswitcher.py --algo="EPOM"
+```
+
+## Citation
+
+If you use this repository in your research or wish to reference it, please cite our TNNLS paper:
+
+```bibtex
+@article{skrynnik2023switch,
+    title = {When to Switch: Planning and Learning for Partially Observable Multi-Agent Pathfinding},
+    author = {Skrynnik, Alexey and Andreychuk, Anton and Yakovlev, Konstantin and Panov, Aleksandr I},
+    journal = {IEEE Transactions on Neural Networks and Learning Systems},
+    year = {2023},
+    publisher = {IEEE}
+}
+```
@@ -0,0 +1,36 @@
+from pathlib import Path
+
+from agents.utils_agents import run_algorithm
+
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
+
+from pydantic import Extra
+
+from agents.replan import RePlanConfig
+from agents.utils_switching import SwitcherBaseConfig, SwitcherBase
+
+
+class ASwitcherConfig(SwitcherBaseConfig, extra=Extra.forbid):
+    name: Literal['ASwitcher'] = 'ASwitcher'
+    planning: RePlanConfig = RePlanConfig(name='RePlanCPP', fix_loops=True, add_none_if_loop=True, no_path_random=False,
+                                          use_best_move=False, fix_nones=False)
+
+
+class AssistantSwitcher(SwitcherBase):
+
+    def get_learning_use_mask(self, planning_actions, learning_actions, observations):
+        return [a is None for a in planning_actions]
+
+
+def example_assistant_switcher(map_name='sc1-AcrosstheCape', max_episode_steps=512, seed=None, num_agents=64,
+                               main_dir='./', animate=False):
+    from agents.epom import EpomConfig
+    algo = AssistantSwitcher(ASwitcherConfig(learning=EpomConfig(path_to_weights=str(main_dir / Path('weights/epom')))))
+    return run_algorithm(algo, map_name, max_episode_steps, seed, num_agents, animate)
+
+
+if __name__ == '__main__':
+    print(example_assistant_switcher(main_dir='../'))
@@ -0,0 +1,135 @@
+import json
+from copy import deepcopy
+from os.path import join
+from pathlib import Path
+
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
+
+import torch
+
+from pydantic import Extra
+
+from sample_factory.algorithms.appo.actor_worker import transform_dict_observations
+from sample_factory.algorithms.appo.learner import LearnerWorker
+from sample_factory.algorithms.appo.model import create_actor_critic
+from sample_factory.algorithms.appo.model_utils import get_hidden_size
+from sample_factory.envs.create_env import create_env
+from sample_factory.utils.utils import AttrDict
+
+from agents.utils_agents import AlgoBase, run_algorithm
+from learning.epom_config import Environment
+from learning.grid_memory import MultipleGridMemory
+from pomapf_env.wrappers import MatrixObservationWrapper
+
+from train_epom import validate_config, register_custom_components
+
+
+class EpomConfig(AlgoBase, extra=Extra.forbid):
+    name: Literal['EPOM'] = 'EPOM'
+    path_to_weights: str = "weights/epom"
+
+
+class EPOM:
+    def __init__(self, algo_cfg):
+        self.algo_cfg: EpomConfig = algo_cfg
+
+        path = algo_cfg.path_to_weights
+        device = algo_cfg.device
+        register_custom_components()
+
+        self.path = path
+        self.env = None
+        config_path = join(path, 'cfg.json')
+        with open(config_path, "r") as f:
+            config = json.load(f)
+        exp, flat_config = validate_config(config['full_config'])
+        algo_cfg = flat_config
+
+        env = create_env(algo_cfg.env, cfg=algo_cfg, env_config={})
+        actor_critic = create_actor_critic(algo_cfg, env.observation_space, env.action_space)
+        env.close()
+
+        if device == 'cpu' or not torch.cuda.is_available():
+            device = torch.device('cpu')
+        else:
+            device = torch.device('cuda')
+        self.device = device
+
+        actor_critic.model_to_device(device)
+        policy_id = algo_cfg.policy_index
+        checkpoints = join(path, f'checkpoint_p{policy_id}')
+        checkpoints = LearnerWorker.get_checkpoints(checkpoints)
+        checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device)
+        actor_critic.load_state_dict(checkpoint_dict['model'])
+
+        self.ppo = actor_critic
+        self.device = device
+        self.cfg = algo_cfg
+
+        self.rnn_states = None
+        self.mgm = MultipleGridMemory()
+        self._step = 0
+
+    def after_reset(self):
+        torch.manual_seed(self.algo_cfg.seed)
+        self.mgm.clear()
+        self._step = 0
+
+    def get_additional_info(self):
+        result = {"rl_used": 1.0, }
+        return result
+
+    def get_name(self):
+        return Path(self.path).name
+
+    def act(self, observations, rewards=None, dones=None, infos=None):
+        observations = deepcopy(observations)
+        if self.rnn_states is None or len(self.rnn_states) != len(observations):
+            self.rnn_states = torch.zeros([len(observations), get_hidden_size(self.cfg)], dtype=torch.float32,
+                                          device=self.device)
+        env_cfg: Environment = Environment(**self.cfg.full_config['environment'])
+        self.mgm.update(observations)
+        gm_radius = env_cfg.grid_memory_obs_radius
+        self.mgm.modify_observation(observations, obs_radius=gm_radius if gm_radius else env_cfg.grid_config.obs_radius)
+        observations = MatrixObservationWrapper.to_matrix(observations)
+
+        with torch.no_grad():
+
+            obs_torch = AttrDict(transform_dict_observations(observations))
+            for key, x in obs_torch.items():
+                obs_torch[key] = torch.from_numpy(x).to(self.device).float()
+            policy_outputs = self.ppo(obs_torch, self.rnn_states, with_action_distribution=True)
+
+            self.rnn_states = policy_outputs.rnn_states
+            actions = policy_outputs.actions
+
+        self._step += 1
+        result = actions.cpu().numpy()
+        return result
+
+    def clear_hidden(self, agent_idx):
+        if self.rnn_states is not None:
+            self.rnn_states[agent_idx] = torch.zeros([get_hidden_size(self.cfg)], dtype=torch.float32,
+                                                     device=self.device)
+
+    def after_step(self, dones):
+        for agent_idx, done_flag in enumerate(dones):
+            if done_flag:
+                self.clear_hidden(agent_idx)
+
+        if all(dones):
+            self.rnn_states = None
+            self.mgm.clear()
+
+
+def example_epom(map_name='sc1-AcrosstheCape', max_episode_steps=512, seed=None, num_agents=64, main_dir='./',
+                 animate=False):
+    algo = EPOM(EpomConfig(path_to_weights=str(main_dir / Path('weights/epom'))))
+    return run_algorithm(algo, map_name, max_episode_steps, seed, num_agents, animate)
+
+
+if __name__ == '__main__':
+    print(example_epom(main_dir='../'))