1- from __future__ import annotations
2-
31import einops
42import pufferlib .models
53import pufferlib .pytorch
@@ -11,17 +9,6 @@ class Recurrent(pufferlib.models.LSTMWrapper):
119 def __init__ (self , env , policy , input_size = 512 , hidden_size = 512 ):
1210 super ().__init__ (env , policy , input_size , hidden_size )
1311
14- def initialize_to_environment (
15- self ,
16- features : dict [str , dict ],
17- action_names : list [str ],
18- action_max_params : list [int ],
19- device ,
20- ):
21- """Pass initialization to wrapped policy."""
22- if hasattr (self .policy , "initialize_to_environment" ):
23- self .policy .initialize_to_environment (features , action_names , action_max_params , device )
24-
2512
2613class Policy (nn .Module ):
2714 def __init__ (self , env , cnn_channels = 128 , hidden_size = 512 , ** kwargs ):
@@ -33,34 +20,6 @@ def __init__(self, env, cnn_channels=128, hidden_size=512, **kwargs):
3320 self .out_height = 11
3421 self .num_layers = 22
3522
36- # Define the standard feature order and their empirically determined normalizations
37- # This acts like original_feature_mapping in MettaAgent
38- self .feature_normalizations = {
39- "type_id" : 9.0 ,
40- "agent:group" : 1.0 ,
41- "hp" : 1.0 ,
42- "agent:frozen" : 10.0 ,
43- "agent:orientation" : 3.0 ,
44- "agent:color" : 254.0 ,
45- "converting" : 1.0 ,
46- "swappable" : 1.0 ,
47- "episode_completion_pct" : 235.0 ,
48- "last_action" : 8.0 ,
49- "last_action_arg" : 9.0 ,
50- "last_reward" : 250.0 ,
51- "agent:glyph" : 29.0 ,
52- "resource_rewards" : 1.0 ,
53- # Inventory features (positions 14-21)
54- "inv:0" : 1.0 ,
55- "inv:1" : 8.0 ,
56- "inv:2" : 1.0 ,
57- "inv:3" : 1.0 ,
58- "inv:4" : 6.0 ,
59- "inv:5" : 3.0 ,
60- "inv:6" : 1.0 ,
61- "inv:7" : 2.0 ,
62- }
63-
6423 self .network = nn .Sequential (
6524 pufferlib .pytorch .layer_init (nn .Conv2d (self .num_layers , cnn_channels , 5 , stride = 3 )),
6625 nn .ReLU (),
@@ -76,9 +35,32 @@ def __init__(self, env, cnn_channels=128, hidden_size=512, **kwargs):
7635 nn .ReLU (),
7736 )
7837
79- # Initialize max_vec with ones - will be properly set during initialize_to_environment
80- # This ensures the model works even if initialize_to_environment isn't called
81- max_vec = torch .ones (self .num_layers , dtype = torch .float32 )[None , :, None , None ]
38+ max_vec = torch .tensor (
39+ [
40+ 9.0 ,
41+ 1.0 ,
42+ 1.0 ,
43+ 10.0 ,
44+ 3.0 ,
45+ 254.0 ,
46+ 1.0 ,
47+ 1.0 ,
48+ 235.0 ,
49+ 8.0 ,
50+ 9.0 ,
51+ 250.0 ,
52+ 29.0 ,
53+ 1.0 ,
54+ 1.0 ,
55+ 8.0 ,
56+ 1.0 ,
57+ 1.0 ,
58+ 6.0 ,
59+ 3.0 ,
60+ 1.0 ,
61+ 2.0 ,
62+ ]
63+ )[None , :, None , None ] # noqa:E231
8264 self .register_buffer ("max_vec" , max_vec )
8365
8466 action_nvec = env .single_action_space .nvec
@@ -147,39 +129,3 @@ def decode_actions(self, hidden):
147129 logits = [dec (hidden ) for dec in self .actor ]
148130 value = self .value (hidden )
149131 return logits , value
150-
151- def initialize_to_environment (
152- self ,
153- features : dict [str , dict ],
154- action_names : list [str ],
155- action_max_params : list [int ],
156- device ,
157- ):
158- """Initialize policy by mapping our feature normalizations to current environment IDs.
159-
160- This works like MettaAgent's feature remapping: we have a fixed set of known
161- features with empirically determined normalizations, and we map them to whatever
162- IDs the current environment uses.
163- """
164- # Create max_vec based on current environment's feature IDs
165- max_values = [1.0 ] * self .num_layers # Default normalization
166-
167- # Map our known features to the environment's feature IDs
168- for feature_name , feature_props in features .items ():
169- if "id" in feature_props and 0 <= feature_props ["id" ] < self .num_layers :
170- feature_id = feature_props ["id" ]
171-
172- # Check if this is a feature we know about
173- if feature_name in self .feature_normalizations :
174- # Use our empirically determined normalization
175- max_values [feature_id ] = self .feature_normalizations [feature_name ]
176- elif feature_name .startswith ("inv:" ) and "inv:0" in self .feature_normalizations :
177- # For unknown inventory items, use a default inventory normalization
178- max_values [feature_id ] = 100.0 # DEFAULT_INVENTORY_NORMALIZATION
179- elif "normalization" in feature_props :
180- # Use environment's normalization for unknown features
181- max_values [feature_id ] = feature_props ["normalization" ]
182-
183- # Update max_vec with the mapped values
184- new_max_vec = torch .tensor (max_values , dtype = torch .float32 , device = device )[None , :, None , None ]
185- self .max_vec .data = new_max_vec
0 commit comments