Skip to content
Open

Dev #39

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
295 commits
Select commit Hold shift + click to select a range
3f018ef
Update episode collector
shuvoxcd01 Apr 20, 2025
af6d555
Update multiprocessing
shuvoxcd01 Apr 20, 2025
9dec3e2
Rename directory
shuvoxcd01 Apr 20, 2025
99c6d58
Change directory
shuvoxcd01 Apr 20, 2025
bd4f877
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Apr 20, 2025
9e93981
Merge dev into research-dev
shuvoxcd01 Apr 20, 2025
13ca329
Fix imports
shuvoxcd01 Apr 20, 2025
ae8aaa6
Change directory
shuvoxcd01 Apr 20, 2025
c59ec2b
Fix imports
shuvoxcd01 Apr 20, 2025
a409fb3
Fix episode return
shuvoxcd01 Apr 20, 2025
4febf6f
Update neuroevolution algorithm
shuvoxcd01 Apr 20, 2025
ff5a5c3
Add neuroevolution example
shuvoxcd01 Apr 20, 2025
5bf9d45
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Apr 20, 2025
6423b35
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Apr 20, 2025
125667c
ppo_off_policy_WIP
shuvoxcd01 Apr 21, 2025
f3cf64c
Update .gitignore
shuvoxcd01 Apr 21, 2025
a95ecae
Update .gitignore
shuvoxcd01 Apr 21, 2025
13609a4
Format files
shuvoxcd01 Apr 21, 2025
45fa0b4
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Apr 21, 2025
c013a78
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Apr 21, 2025
58e5ed5
Format files
shuvoxcd01 Apr 21, 2025
95c155b
Updated algorithms
shuvoxcd01 Apr 21, 2025
d2196de
Change value estimator class
shuvoxcd01 Apr 21, 2025
b624132
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Apr 21, 2025
e03b6b3
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Apr 21, 2025
5865aae
Add parent tracker
shuvoxcd01 May 10, 2025
7ccd61c
Track generation
shuvoxcd01 May 10, 2025
98c8d90
Add QAssistedNeuroEvolution
shuvoxcd01 May 10, 2025
0bebc0f
Add DeepQLearningWithExperienceReplay
shuvoxcd01 May 10, 2025
48bcbae
Add performance evaluator for evolutionary rl
shuvoxcd01 May 10, 2025
e2df655
Add additional properties
shuvoxcd01 May 10, 2025
bb62cd6
Use truncation selection from Selection module
shuvoxcd01 May 10, 2025
7856eb0
Minor refactor
shuvoxcd01 May 10, 2025
b40fb36
Add Selection module
shuvoxcd01 May 10, 2025
60722cd
Add async tensorboard logger
shuvoxcd01 May 10, 2025
8dd30f6
Add deep q learning
shuvoxcd01 May 10, 2025
fb85952
Annotate _train as abstractmethod
shuvoxcd01 May 10, 2025
040e501
Add simple replay buffer
shuvoxcd01 May 10, 2025
30c8d77
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 May 10, 2025
8b9c255
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 May 10, 2025
db4e23d
Update algorithm listing
shuvoxcd01 May 10, 2025
217f8a5
Add save_policy parameter to train
shuvoxcd01 May 10, 2025
a081701
Make num_actions parameter optional
shuvoxcd01 May 10, 2025
c6230f0
Update ActorCriticPolicy policy call with appropriate argument
shuvoxcd01 May 10, 2025
d291011
Delegate env to parent class
shuvoxcd01 May 10, 2025
c84910d
Rename parameter
shuvoxcd01 May 10, 2025
47f25e0
Disable flatenning
shuvoxcd01 May 10, 2025
4eb9ebd
Add tests
shuvoxcd01 May 10, 2025
bef7698
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 May 10, 2025
fe0ce99
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 May 10, 2025
796232d
Remove unused imports
shuvoxcd01 May 11, 2025
e2902dd
Change base class type
shuvoxcd01 May 11, 2025
2c508bf
Add policy for Atari
shuvoxcd01 May 11, 2025
281ec61
Add q-network for Atari
shuvoxcd01 May 11, 2025
8ad7dc2
Update algorithm
shuvoxcd01 May 11, 2025
8369f0b
Change type-hint for q_network parameter
shuvoxcd01 May 11, 2025
0238b7c
Add research algorithm
shuvoxcd01 May 11, 2025
57e8ad6
Add examples for atari
shuvoxcd01 May 11, 2025
4239472
Reformat
shuvoxcd01 May 11, 2025
9e315ab
Update example
shuvoxcd01 May 11, 2025
101b5ff
Update algorithm
shuvoxcd01 May 11, 2025
fbac2f2
Fix channel and batch dim
shuvoxcd01 May 11, 2025
410e4af
Add mechanism to pop elements
shuvoxcd01 May 11, 2025
2567b9c
Reformat
shuvoxcd01 May 11, 2025
af0f7ad
Add cuda support
shuvoxcd01 May 13, 2025
c3c985e
Rename directory
shuvoxcd01 May 13, 2025
084969e
Add save and load mechanisms
shuvoxcd01 May 25, 2025
8518c50
Add autofire wrapper
shuvoxcd01 May 25, 2025
945a9c2
Add flag for q-derived policy evaluation
shuvoxcd01 May 26, 2025
8322dfd
Add idle_truncation_wrapper
shuvoxcd01 May 26, 2025
f390001
save_networks
shuvoxcd01 May 26, 2025
e6cf041
Reformat
shuvoxcd01 May 26, 2025
6539462
Periodically save q and agent network
shuvoxcd01 May 31, 2025
7dcfc99
Add q vs agent network comparison
shuvoxcd01 May 31, 2025
83bb1af
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 May 31, 2025
6be9525
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 May 31, 2025
298ca2f
Add saving and loading mechanism
shuvoxcd01 May 31, 2025
a0bde4f
Reformat files
shuvoxcd01 May 31, 2025
a8024e6
Add mechanism to load and save q network
shuvoxcd01 May 31, 2025
9fde844
Reformat files
shuvoxcd01 May 31, 2025
bd2a9b6
Merge remote-tracking branch 'public/dev' into dev
shuvoxcd01 May 31, 2025
16305fa
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 May 31, 2025
ece4464
Reformat
shuvoxcd01 Jun 1, 2025
ecdc4e3
Add target network
shuvoxcd01 Jun 1, 2025
305addc
Separate target and online network
shuvoxcd01 Jun 1, 2025
ce68ad4
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Jun 1, 2025
aed79c5
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 1, 2025
0a3981d
Add option for training with num_steps along with num_episodes
shuvoxcd01 Jun 10, 2025
c275689
Add empty _train_steps method
shuvoxcd01 Jun 10, 2025
76b2c00
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into pub…
shuvoxcd01 Jun 10, 2025
fc8d31b
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into res…
shuvoxcd01 Jun 10, 2025
5c82764
Add optional info
shuvoxcd01 Jun 10, 2025
e69ed79
Add base class for evolutionary RL algorithms
shuvoxcd01 Jun 10, 2025
9fa0c0d
Refactor
shuvoxcd01 Jun 10, 2025
a41fe04
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 10, 2025
24899e9
Refactor
shuvoxcd01 Jun 14, 2025
90e24f4
Add extra dim for scalar observations
shuvoxcd01 Jun 14, 2025
af9329d
Refactor
shuvoxcd01 Jun 14, 2025
331910e
Add numpy array as input
shuvoxcd01 Jun 14, 2025
7c6bb13
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into res…
shuvoxcd01 Jun 14, 2025
d512d3a
Rename parameter
shuvoxcd01 Jun 14, 2025
6b52306
Refactor
shuvoxcd01 Jun 14, 2025
211cd45
Reformat
shuvoxcd01 Jun 14, 2025
5f78a9b
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 14, 2025
07c6775
Reformat
shuvoxcd01 Jun 14, 2025
8b23317
Fix device placement
shuvoxcd01 Jun 14, 2025
806ad8a
Add option to add network graph to tensorboard
shuvoxcd01 Jun 16, 2025
c3b7170
Add graph to tensorbaord
shuvoxcd01 Jun 16, 2025
d2620e9
Rename parameter
shuvoxcd01 Jun 16, 2025
cef7b87
Use encoding
shuvoxcd01 Jun 16, 2025
2ed095c
Add embedding based feature extractor
shuvoxcd01 Jun 16, 2025
8e0414d
Add q_network with embedding layer
shuvoxcd01 Jun 16, 2025
7801491
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 16, 2025
834314c
Add examples
shuvoxcd01 Jun 16, 2025
286b992
Add preprocessing
shuvoxcd01 Jun 16, 2025
424d39b
Reformat
shuvoxcd01 Jun 16, 2025
e3d9403
Add dqn taxi example
shuvoxcd01 Jun 18, 2025
f437bc6
Update example
shuvoxcd01 Jun 18, 2025
51c4a2e
Dynamically adjust mutation std
shuvoxcd01 Jun 21, 2025
136552f
Use embedding instead of one hot
shuvoxcd01 Jun 21, 2025
663d7a2
Update in and out features number
shuvoxcd01 Jun 21, 2025
df8d36b
Introduce embedding
shuvoxcd01 Jun 21, 2025
096ef35
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into pub…
shuvoxcd01 Jun 21, 2025
46cda15
Merge branch 'research-dev' of https://github.com/shuvoxcd01/GridMind…
shuvoxcd01 Jun 21, 2025
d2d3925
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 21, 2025
6931b15
Update gitignore
shuvoxcd01 Jun 22, 2025
211e8df
Update algorithm
shuvoxcd01 Jun 22, 2025
4beacec
Refactor
shuvoxcd01 Jun 22, 2025
8df4678
Update .gitignore
shuvoxcd01 Jun 22, 2025
b81f544
Refactor
shuvoxcd01 Jun 22, 2025
7c40959
Add taxi_q_network
shuvoxcd01 Jun 22, 2025
ed9172b
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 22, 2025
459a132
Chage method name
shuvoxcd01 Jun 22, 2025
7ddc982
Change method name
shuvoxcd01 Jun 22, 2025
191f1a8
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 22, 2025
ba6bedd
Update algorithm
shuvoxcd01 Jun 23, 2025
b0d53df
Add method to get action probabilities
shuvoxcd01 Jun 23, 2025
8244d92
Refactor
shuvoxcd01 Jun 23, 2025
399daee
Add comparisons
shuvoxcd01 Jun 23, 2025
b6d7f02
Add metrics
shuvoxcd01 Jun 23, 2025
e9b8730
Add info
shuvoxcd01 Jun 23, 2025
3c75e81
Update examples
shuvoxcd01 Jun 23, 2025
4f1bdfa
Add wrapper env for taxi
shuvoxcd01 Jun 23, 2025
95246c5
Merge branch 'research-dev' of https://github.com/shuvoxcd01/GridMind…
shuvoxcd01 Jun 23, 2025
6fbbc47
Add soft update of target network
shuvoxcd01 Jun 23, 2025
22f46c5
Add default option for soft update
shuvoxcd01 Jun 23, 2025
7e3b3a9
Add max_grad_norm parameter and gradient clipping to prevent explodin…
shuvoxcd01 Jun 23, 2025
7e5f108
Add customizable loss function to DeepQLearningWithExperienceReplay
shuvoxcd01 Jun 23, 2025
ceafacc
Refactor QAssistedNeuroEvolution class to improve code readability an…
shuvoxcd01 Jun 25, 2025
55bf345
Refactor code for improved readability and consistency by removing un…
shuvoxcd01 Jun 25, 2025
2621124
Add example usage for LunarLander with Deep Q-Learning agent
shuvoxcd01 Jun 25, 2025
64d535c
Add configuration files and performance comparison script for LunarLa…
shuvoxcd01 Jun 29, 2025
7b8b819
Refactor QAssistedNeuroEvolution class to enhance mutation parameter …
shuvoxcd01 Jun 29, 2025
e79926f
Enhance NeuroEvolution class with generation tracking and best agent …
shuvoxcd01 Jun 29, 2025
681c1c9
Enhance NeuroEvolution class with generation tracking and best agent …
shuvoxcd01 Jun 29, 2025
a418d67
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Jun 29, 2025
136c5b9
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Jun 29, 2025
df6f7b2
Fix summary directory handling in configuration loading
shuvoxcd01 Jun 30, 2025
da51aa1
Add configuration files for Lunar Lander environment
shuvoxcd01 Jun 30, 2025
9db0c85
Refactor configuration loading in Q-assisted neuroevolution script to…
shuvoxcd01 Jun 30, 2025
ffdebeb
Remove unused imports in Q-assisted neuroevolution performance compar…
shuvoxcd01 Jun 30, 2025
50399f5
Fix configuration file processing to ensure only .ini files are proce…
shuvoxcd01 Jun 30, 2025
e54f519
Fix configuration loading to use dynamic generation count
shuvoxcd01 Jun 30, 2025
0ebde91
Add configuration files for Lunar Lander environment with updated par…
shuvoxcd01 Jun 30, 2025
1543912
Fix import path for ConfigLoader in Q-assisted neuroevolution perform…
shuvoxcd01 Jun 30, 2025
022367e
Refactor configuration loading and environment setup in Q-assisted ne…
shuvoxcd01 Jun 30, 2025
5f8c32b
Add basic configuration file for Taxi environment setup
shuvoxcd01 Jun 30, 2025
59b76ec
Refactor training method names from _train to train for consistency a…
shuvoxcd01 Jul 14, 2025
56c1f32
Remove unnecessary blank lines in BaseEvoRLAlgorithm class for improv…
shuvoxcd01 Jul 14, 2025
736d957
Add __all__ export for algorithm classes in __init__.py
shuvoxcd01 Jul 14, 2025
5088142
Fix import error handling for SAVE_DATA_DIR in deep_q_learning.py
shuvoxcd01 Jul 14, 2025
0876a29
Refactor constructor calls in MonteCarloEveryVisitPrediction, MonteCa…
shuvoxcd01 Jul 14, 2025
ad75de6
List algorithms in __init__.py
shuvoxcd01 Jul 15, 2025
1cf9f3e
Add feature construction classes to __init__.py for easy access
shuvoxcd01 Jul 15, 2025
22e89b2
Add CITATION.cff and pre-commit hooks for version synchronization
shuvoxcd01 Aug 23, 2025
f5ad74f
Merge branch 'main' of https://github.com/shuvoxcd01/GridMind into pu…
shuvoxcd01 Aug 23, 2025
e37e5a0
Refactor BaseLearningAlgorithm constructor to handle env initializati…
shuvoxcd01 Aug 26, 2025
d3ac74a
Update optional dependencies for rl-worlds to version 0.0.3.post1
shuvoxcd01 Aug 27, 2025
64e6abb
Move episode collector and trajectory imports to the correct utility …
shuvoxcd01 Aug 27, 2025
0a3dd94
Add DeterministicLookupPolicy class for action selection based on a l…
shuvoxcd01 Aug 27, 2025
2f6144c
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 Aug 27, 2025
7da9394
Add CITATION.cff to .gitignore
shuvoxcd01 Aug 27, 2025
17f26a6
Refactor ActorCriticPolicy to accept observation shape and number of …
shuvoxcd01 Oct 31, 2025
e9ae10a
Merge branch 'dev' into research-dev
shuvoxcd01 Oct 31, 2025
b95bb55
Fix training function reference in BaseEvoRLAlgorithm to use the abst…
shuvoxcd01 Nov 2, 2025
2bf2c0d
Implement QLearningExperienceReplay class with experience replay func…
shuvoxcd01 Nov 2, 2025
dd2de7a
Refactor NeuroAgent to use 'policy' instead of 'network' for clarity;…
shuvoxcd01 Nov 2, 2025
a258f21
Refactor update_citation_version.py to use double quotes for string l…
shuvoxcd01 Nov 5, 2025
54a8c54
Reformat
shuvoxcd01 Nov 5, 2025
0551add
Reformat
shuvoxcd01 Nov 5, 2025
ca6998d
Refactor to replace QAssistedNeuroEvolution with DeepQAssistedNeuroEv…
shuvoxcd01 Nov 5, 2025
198ce14
Add BaseQAssistedNeuroEvolution class and related components
shuvoxcd01 Nov 5, 2025
03f3b0e
Refactor code for improved readability and consistency across multipl…
shuvoxcd01 Nov 5, 2025
f20ee65
Refactor code for consistency and readability; update formatting in v…
shuvoxcd01 Nov 5, 2025
89cefed
Add get_all_action_probabilities method to policy classes; rename get…
shuvoxcd01 Nov 17, 2025
f401f6b
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind-Research…
shuvoxcd01 Nov 17, 2025
fa9a6ef
Add @torch.no_grad() decorator to mutation methods and update action …
shuvoxcd01 Nov 17, 2025
8868692
Add behavior_score property to NeuroAgent and update get_metadata method
shuvoxcd01 Dec 9, 2025
2a00eb9
Add KNNNeighborRetriever class for novelty search in evolutionary alg…
shuvoxcd01 Dec 9, 2025
4152f38
Add ParetoSelector class for non-dominated sorting and Pareto-front s…
shuvoxcd01 Dec 9, 2025
12e97e5
Add NoveltyUtils class for probability distribution analysis and dist…
shuvoxcd01 Dec 9, 2025
c22192d
Add assign_novelty_scores method to NeuroEvolutionUtil for novelty sc…
shuvoxcd01 Dec 9, 2025
c7b943c
Add novelty search functionality to BaseQAssistedNeuroEvolution class
shuvoxcd01 Dec 9, 2025
b74513c
Add use_novelty_search parameter to QAssistedNeuroEvolution class
shuvoxcd01 Dec 9, 2025
2f7d54a
Optimize Q-value conversion to use numpy for improved efficiency
shuvoxcd01 Dec 9, 2025
46b7ed9
Refactor action probability calculations to use numpy for improved pe…
shuvoxcd01 Dec 9, 2025
68e7812
Fix initialization of QTableDerivedEpsilonGreedyPolicy to correctly s…
shuvoxcd01 Dec 9, 2025
0a5d83c
Add video recording and evaluation parameters to neuroevolution classes
shuvoxcd01 Dec 12, 2025
a427406
Refactor and enhance policy classes; add new wrappers and utility fun…
shuvoxcd01 Dec 12, 2025
a9a8504
Fix QLearning constructor to include summary_dir and write_summary pa…
shuvoxcd01 Dec 12, 2025
9fd0b94
Add summary_dir and write_summary parameters to various learning algo…
shuvoxcd01 Dec 14, 2025
6f5a8cb
Update default_save_dir logic to handle missing SAVE_DATA_DIR
shuvoxcd01 Dec 14, 2025
a053f5a
Set epsilon value in QLearning constructor and update policy with eps…
shuvoxcd01 Dec 19, 2025
06486b8
Enhance action selection in QLearning and policy classes to support a…
shuvoxcd01 Dec 19, 2025
e21bcfe
chore: apply pre-commit auto-fixes and add additional hooks
shuvoxcd01 Dec 21, 2025
e86876f
chore: apply formatting fixes to example files
shuvoxcd01 Dec 21, 2025
75920e2
Fix CITATION.cff to correctly identify "Das" as family name (#17)
Copilot Dec 21, 2025
2890cd6
chore: update version and dependencies in pyproject.toml; fix citatio…
shuvoxcd01 Dec 21, 2025
0ee2c03
Merge remote-tracking branch 'public/main' into public_dev
shuvoxcd01 Dec 21, 2025
ce71387
Fix QNetworkDerivedEpsilonGreedyPolicy missing num_actions parameter …
Copilot Dec 21, 2025
9f23fc6
feat: add MinMaxNormalizer to feature construction pipeline and updat…
shuvoxcd01 Dec 31, 2025
412d879
feat: implement MinMaxNormalizer for scaling continuous observations
shuvoxcd01 Dec 31, 2025
41faba5
feat: add GridDiscretizationWrapper for discretizing continuous obser…
shuvoxcd01 Dec 31, 2025
d4e3025
fix: correct syntax error in MinMaxNormalizer initialization
shuvoxcd01 Dec 31, 2025
a330369
feat: add .mcp.json to .gitignore
shuvoxcd01 Dec 31, 2025
aa889ef
fix: update urllib3 version in dependencies
shuvoxcd01 Jan 8, 2026
8d73935
feat: update .gitignore to include research files and Obsidian notes …
shuvoxcd01 Jan 8, 2026
c1dd3dd
feat: enhance _get_greedy_action method to support action masking
shuvoxcd01 Jan 8, 2026
589dfc7
feat: implement Prioritized Experience Replay Buffer with SumTree str…
shuvoxcd01 Jan 8, 2026
68339a3
fix: correct training function reference in BaseEvoRLAlgorithm
shuvoxcd01 Jan 18, 2026
0d9e007
refactor: update NeuroEvolution to use policy instead of network for …
shuvoxcd01 Jan 18, 2026
d788764
Fix tabular q update (#37)
shuvoxcd01 Feb 18, 2026
942988a
feat: implement Q-Learning with Eligibility Trace and enhance Q-value…
shuvoxcd01 Feb 19, 2026
a049671
Add OneStep PPO algorithm with training and evaluation setup
shuvoxcd01 May 12, 2026
e1fc6e0
refactor: streamline PPO class parameters and training logic for impr…
shuvoxcd01 May 12, 2026
11a8f6f
feat: implement get_all_action_probabilities method to return action …
shuvoxcd01 May 12, 2026
7cc290b
Merge branch 'dev' of https://github.com/shuvoxcd01/GridMind into pub…
shuvoxcd01 May 12, 2026
938b6d5
Merge branch 'main' of https://github.com/shuvoxcd01/GridMind into dev
shuvoxcd01 May 12, 2026
a776ac2
refactor: simplify method implementations and improve code clarity ac…
shuvoxcd01 May 12, 2026
0d03c13
refactor: optimize SumTree methods and enhance PrioritizedReplayBuffe…
shuvoxcd01 May 12, 2026
3aa75db
refactor: rename get_continuous_observation to discretize_observation…
shuvoxcd01 May 12, 2026
2baf831
refactor: update logging and summary handling across multiple algorithms
shuvoxcd01 May 15, 2026
7853e32
refactor: add write_summary=False to algorithm initializations in tes…
shuvoxcd01 May 15, 2026
e60cf62
refactor: remove main execution blocks from multiple files for cleane…
shuvoxcd01 May 15, 2026
9b47e17
refactor: enhance action probability handling and improve numerical s…
shuvoxcd01 May 15, 2026
64f7a43
feat: add QNetworkToStateValueEstimatorWrapper for state value estima…
shuvoxcd01 May 19, 2026
f74d19b
refactor: add getter and setter for policy in NeuroAgent class
shuvoxcd01 May 21, 2026
e35741a
fix: update NeuroAgent instantiation to use 'network' parameter inste…
shuvoxcd01 May 21, 2026
f7549e1
feat: add get_all_action_probabilities method to StochasticStartGreed…
shuvoxcd01 May 21, 2026
8849d51
fix: set write_summary parameter to False in various algorithms
shuvoxcd01 May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,12 @@ example_usage/*
*.pth
.gitignore
CITATION.cff
CLAUDE.md
.claude/*
.reports/*
.mcp.json
research/*
GEMINI.md

#Obsidian notes vault
gridmind-notes-vault/*
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


from gridmind.feature_construction.multi_hot import MultiHotEncoder
from gridmind.feature_construction.normalizer import MinMaxNormalizer
from gridmind.feature_construction.tile_coding import TileCoding
from gridmind.policies.parameterized.discrete_action_mlp_policy import (
DiscreteActionMLPPolicy,
Expand All @@ -13,13 +14,20 @@
)
import gymnasium as gym
import torch
import numpy as np


env = gym.make("MountainCar-v0")

# Feature construction pipeline: normalize -> tile coding -> multi-hot encoding
num_tilings = 7
normalizer = MinMaxNormalizer(
low=np.array([-1.2, -0.07]), # Mountain Car observation bounds
high=np.array([0.6, 0.07]),
)
multi_hot_encoder = MultiHotEncoder(num_categories=num_tilings**4)
tile_encoder = TileCoding(ihtORsize=num_tilings**4, numtilings=num_tilings)
feature_constructor = lambda x: multi_hot_encoder(tile_encoder(x))
feature_constructor = lambda x: multi_hot_encoder(tile_encoder(normalizer(x)))

observation, _ = env.reset()

Expand Down
37 changes: 17 additions & 20 deletions src/gridmind/algorithms/base_learning_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import dill
from gridmind.policies.base_policy import BasePolicy
import logging
from gridmind.config import get_save_dir
from gridmind.utils.divergence.base_divergence_detector import BaseDivergenceDetector
from gridmind.utils.logtools.async_tensorboard_logger import AsyncTensorboardLogger
from gridmind.utils.logtools.null_logger import NullWriter
from gridmind.utils.performance_evaluation.base_performance_evaluator import (
BasePerformanceEvaluator,
)
Expand All @@ -16,12 +17,6 @@
)
from gymnasium import Env
from tqdm import trange
from torch.utils.tensorboard import SummaryWriter

try:
from data import SAVE_DATA_DIR
except ImportError:
SAVE_DATA_DIR = None


class BaseLearningAlgorithm(ABC):
Expand All @@ -30,7 +25,7 @@ def __init__(
name: str,
env: Optional[Env] = None,
summary_dir: Optional[str] = None,
write_summary: bool = True,
write_summary: bool = False,
) -> None:
self.name = name
self.logger = logging.getLogger(self.__class__.__name__)
Expand All @@ -48,12 +43,14 @@ def __init__(
self.monitor_divergence = False
self.stop_on_divergence = False

self.summary_writer = NullWriter()
self.write_summary = write_summary
if self.write_summary:
assert (
summary_dir is not None or SAVE_DATA_DIR is not None
), "Please specify summary_dir"

if summary_dir is None and get_save_dir() is None:
raise ValueError(
"write_summary=True requires either summary_dir or "
"gridmind.config.set_save_dir() to be set."
)
self._initialize_summary_writer(summary_dir, env_name)

def _initialize_summary_writer(
Expand All @@ -63,7 +60,10 @@ def _initialize_summary_writer(
extra_info: str = "",
use_async_writer: bool = False,
):
summary_dir = summary_dir if summary_dir is not None else SAVE_DATA_DIR
from torch.utils.tensorboard import SummaryWriter
from gridmind.utils.logtools.async_tensorboard_logger import AsyncTensorboardLogger

summary_dir = summary_dir if summary_dir is not None else get_save_dir()

log_dir = os.path.join(
summary_dir,
Expand Down Expand Up @@ -291,11 +291,8 @@ def _training_wrapper(
if save_policy:
env_name = self.env.spec.id if self.env.spec is not None else "unknown"

if save_policy:
env_name = self.env.spec.id if self.env.spec is not None else "unknown"

if SAVE_DATA_DIR is not None:
saved_policy_dir = os.path.join(SAVE_DATA_DIR, env_name)
if get_save_dir() is not None:
saved_policy_dir = os.path.join(get_save_dir(), env_name)
self.save_policy(saved_policy_dir)

def _report_all_metrics(self):
Expand All @@ -314,8 +311,8 @@ def _report_all_metrics(self):

env_name = self.env.spec.id if self.env.spec is not None else "unknown"

if SAVE_DATA_DIR is not None:
saved_policy_dir = os.path.join(SAVE_DATA_DIR, env_name)
if get_save_dir() is not None:
saved_policy_dir = os.path.join(get_save_dir(), env_name)
self.save_policy(saved_policy_dir)

def evaluate_policy(self, num_episodes: int):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import abstractmethod
from typing import Optional
from gridmind.algorithms.base_learning_algorithm import BaseLearningAlgorithm
from gridmind.policies.base_policy import BasePolicy
Expand All @@ -10,7 +11,7 @@ def __init__(
name: str,
env: Optional[Env] = None,
summary_dir: Optional[str] = None,
write_summary: bool = True,
write_summary: bool = False,
) -> None:
super().__init__(
name, env, summary_dir=summary_dir, write_summary=write_summary
Expand All @@ -34,10 +35,13 @@ def _train_episodes(self, num_episodes: int, prediction_only: bool):
def _train_steps(self, num_steps: int, prediction_only: bool, *args, **kwargs):
raise NotImplementedError

@abstractmethod
def _train(self, num_generations: int, *args, **kwargs): ...

def train(self, num_generations: int, save_policy: bool = True):
self._training_wrapper(
num_iter=num_generations,
prediction_only=False,
save_policy=save_policy,
training_fn=self.train,
training_fn=self._train,
)
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ def __init__(
def __repr__(self):
return f"NeuroAgent(id={self.id}, fitness={self.fitness}, starting_generation={self.starting_generation})"

@property
def policy(self):
return self.network

@policy.setter
def policy(self, value):
self.network = value

@property
def id(self):
return str(self._id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(
num_processes: Optional[int] = None,
stopping_fitness: Optional[float] = None,
summary_dir: Optional[str] = None,
write_summary: bool = True,
write_summary: bool = False,
):
super().__init__(
name="NeuroEvolution",
Expand All @@ -43,8 +43,6 @@ def __init__(
write_summary=write_summary,
)

self.env = env
self.name = "NeuroEvolution"
self.mu = mu
self._lambda = _lambda
self.mutation_mean = mutation_mean
Expand Down Expand Up @@ -81,7 +79,7 @@ def get_best(self, unwrapped: bool = True):
), "No best agent found. Train the algorithm first."

if unwrapped:
return self.best_agent.network
return self.best_agent.policy

return self.best_agent

Expand Down Expand Up @@ -163,7 +161,7 @@ def evaluate_fitness(

return sum_episode_return / average_over_episodes

def train(self, num_generations: int, *args, **kwargs):
def _train(self, num_generations: int, *args, **kwargs):
for num_gen in trange(num_generations):
agent_to_assess_fitness = []

Expand All @@ -172,8 +170,7 @@ def train(self, num_generations: int, *args, **kwargs):
agent_to_assess_fitness.append(agent)

fitness_scores = [
self.evaluate_fitness(agent.network)
for agent in agent_to_assess_fitness
self.evaluate_fitness(agent.policy) for agent in agent_to_assess_fitness
]

for agent, fitness in zip(agent_to_assess_fitness, fitness_scores):
Expand Down Expand Up @@ -227,55 +224,15 @@ def train(self, num_generations: int, *args, **kwargs):
for parent in parents:
for _ in range(self._lambda // self.mu):
mutated_param_vector = self.mutate(
network=parent.network,
network=parent.policy,
mean=self.mutation_mean,
std=self.mutation_std,
)
child = self.spawn_individual()
NeuroEvolutionUtil.set_parameters_vector(
child.network, mutated_param_vector
child.policy, mutated_param_vector
)
self.population.append(child)

self._generation += 1
return self.best_agent


if __name__ == "__main__":
from itertools import product

env = gym.make("CartPole-v1")

mutation_means = [0, 0.1, 0.2]
mutation_stds = [0.1, 0.2, 0.3]

mutation_rate_combinations = list(product(mutation_means, mutation_stds))

trained_agents = []

for mutation_mean, mutation_std in mutation_rate_combinations:
algorithm = NeuroEvolution(
env=env,
mu=5,
_lambda=20,
stopping_fitness=500,
mutation_mean=mutation_mean,
mutation_std=mutation_std,
)
trained_agents.append(algorithm.train(num_generations=1000))

eval_env = gym.make("CartPole-v1", render_mode="human")

policy = random.choice(trained_agents).network

obs, info = eval_env.reset()
done = False

episode_return = 0.0

while not done:
obs = algorithm._preprocess(obs)
action = policy.get_action(obs)
obs, reward, terminated, truncated, info = eval_env.step(action)
episode_return += reward
done = terminated or truncated
Original file line number Diff line number Diff line change
Expand Up @@ -48,43 +48,3 @@ def evaluate_fitness(
done = terminated or truncated

return sum_episode_return / average_over_episodes


if __name__ == "__main__":
import torch
import torch.nn as nn
import numpy as np

# Define a simple MLP
class SimpleNN(nn.Module):
def __init__(self, input_size=4, hidden_size=10, output_size=2):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)

def forward(self, x):
return self.fc2(torch.relu(self.fc1(x)))

# Create an instance of the model
model = SimpleNN()
vector = NeuroEvolutionUtil.get_parameters_vector(
model
) # Extract weights as a flat vector
print(vector)
print(vector.shape)
NeuroEvolutionUtil.set_parameters_vector(model, vector)

def mutate(model, mean, std):
chromosome = NeuroEvolutionUtil.get_parameters_vector(model)
noise = np.random.normal(loc=mean, scale=std, size=chromosome.shape)

mutated_chromosome = chromosome + noise

NeuroEvolutionUtil.set_parameters_vector(
model, mutated_chromosome
) # Set weights from a flat vector

return mutated_chromosome

mutated_vector = mutate(model, 0, 0.01)
print(mutated_vector)
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(
clip_grads: bool = True,
grad_clip_value: float = 1.0,
summary_dir: Optional[str] = None,
write_summary: bool = True,
write_summary: bool = False,
):
super().__init__(
"OneStepActorCritic",
Expand Down Expand Up @@ -129,9 +129,12 @@ def _train_episodes(self, num_episodes: int, prediction_only: bool = False):

next_observation = self._preprocess(next_observation)

next_state_value = (
self.value_estimator(next_observation) if not terminated else 0
)
with torch.no_grad():
next_state_value = (
self.value_estimator(next_observation)
if not terminated
else torch.tensor(0.0)
)

cur_state_value = self.value_estimator(observation)

Expand All @@ -146,7 +149,7 @@ def _train_episodes(self, num_episodes: int, prediction_only: bool = False):
self.logger.debug(f"Value grads: {value_grads}")

policy_grads = torch.autograd.grad(
torch.log(self.policy.get_action_prob(observation, action)),
self.policy.get_log_action_prob(observation, action),
self.policy.parameters(),
)
self.logger.debug(f"Policy grads: {policy_grads}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(
env: Optional[Env] = None,
feature_constructor=None,
summary_dir=None,
write_summary=True,
write_summary=False,
):
super().__init__(name, env, summary_dir, write_summary)
self.feature_constructor = feature_constructor
Expand Down
Loading