Add second distribution pattern

mj023 · mj023 · commit e3bd7e48e34e · 2026-05-08T21:53:38.000+02:00
diff --git a/src/lcm/grids/base.py b/src/lcm/grids/base.py
@@ -16,7 +16,6 @@ def batch_size(self) -> int:
 
         """
 
-    
     @property
     @abstractmethod
     def distributed(self) -> bool:
diff --git a/src/lcm/grids/discrete.py b/src/lcm/grids/discrete.py
@@ -19,7 +19,9 @@ class DiscreteGrid(Grid):
 
     """
 
-    def __init__(self, category_class: type, batch_size: int = 0, distributed = False) -> None:
+    def __init__(
+        self, category_class: type, batch_size: int = 0, distributed=False
+    ) -> None:
         _validate_discrete_grid(category_class)
         names_and_values = get_field_names_and_values(category_class)
         self.__categories = tuple(names_and_values.keys())
@@ -47,7 +49,7 @@ def ordered(self) -> bool:
     def batch_size(self) -> int:
         """Return batch size during solution."""
         return self.__batch_size
-    
+
     @property
     def distributed(self) -> bool:
         """Return batch size during solution."""
diff --git a/src/lcm/interfaces.py b/src/lcm/interfaces.py
@@ -1,13 +1,14 @@
 import dataclasses
 from collections.abc import Callable
+from functools import reduce
+from operator import mul
 from types import MappingProxyType
 from typing import cast
 
-from functools import reduce
-from operator import mul
+import jax
 import pandas as pd
 from jax import Array
-import jax
+
 from lcm.exceptions import PyLCMError
 from lcm.grids import Grid, IrregSpacedGrid
 from lcm.shocks import _ShockGrid
@@ -314,65 +315,61 @@ def state_action_space(self, regime_params: FlatRegimeParams) -> StateActionSpac
                 | action_replacements
             )
             if action_replacements
-            else None
+            else dict(self._base_state_action_space.continuous_actions)
         )
-        
+
         avail_devices = jax.devices()
-        distributed_grids = {name:grid for name,grid in self.grids.items() if grid.distributed == True}
-        print(distributed_grids)
+        distributed_grids = {
+            name: grid for name, grid in self.grids.items() if grid.distributed == True
+        }
         if len(distributed_grids) == 1:
             n_points = distributed_grids[list(distributed_grids)[0]].to_jax().shape[0]
             state_name = list(distributed_grids)[0]
             if n_points % len(avail_devices) == 0:
-                mesh = jax.make_mesh((len(avail_devices),), ('X'), axis_types=(jax.sharding.AxisType.Auto),devices=avail_devices)
-                new_states[state_name] = jax.device_put(new_states[state_name], jax.NamedSharding(mesh=mesh, spec=jax.P('X',)))
+                mesh = jax.make_mesh(
+                    (len(avail_devices),),
+                    ("X"),
+                    axis_types=(jax.sharding.AxisType.Auto),
+                    devices=avail_devices,
+                )
+                new_states[state_name] = jax.device_put(
+                    new_states[state_name],
+                    jax.NamedSharding(mesh=mesh, spec=jax.P("X")),
+                )
             else:
                 raise PyLCMError(
-                "When distributing over one grid, the number of points in the grid "
-                "needs to be a multiple of the available devices. Gridpoints: "
-                f" {n_points} Available Devices: {len(avail_devices)}"
-            )
+                    "When distributing over one grid, the number of points in the grid "
+                    "needs to be a multiple of the available devices. Gridpoints: "
+                    f" {n_points} Available Devices: {len(avail_devices)}"
+                )
         if len(distributed_grids) > 1:
-            permutations = reduce(mul, [grid.to_jax().shape[0] for grid in distributed_grids.values()])
-            print(permutations)
+            permutations = reduce(
+                mul, [grid.to_jax().shape[0] for grid in distributed_grids.values()]
+            )
             if permutations == len(avail_devices):
-                device_orders = _partitioning_algo(list(distributed_grids.values()), avail_devices)
-                print(device_orders)
-                for i, (state_name, grid) in enumerate(distributed_grids.items()):
-                    mesh = jax.make_mesh((grid.to_jax().shape[0],), ('X'), devices=device_orders[i])
-                    new_states[state_name] = jax.device_put(new_states[state_name],jax.NamedSharding(mesh=mesh, spec=jax.P('X',)))
+                mesh = jax.make_mesh(
+                    tuple(len(grid.to_jax()) for grid in distributed_grids.values()),
+                    tuple(distributed_grids.keys()),
+                    axis_types=tuple(
+                        jax.sharding.AxisType.Auto for grid in distributed_grids
+                    ),
+                    devices=avail_devices,
+                )
+                for state_name in distributed_grids:
+                    new_states[state_name] = jax.device_put(
+                        new_states[state_name],
+                        jax.NamedSharding(mesh=mesh, spec=jax.P(state_name)),
+                    )
             else:
                 raise PyLCMError(
-                "When distributing over multiple grids, the product of the number of"
-                " points of the grids needs to match the number of available devices."
-                f" Gridpoints: {permutations} Available Devices: {len(avail_devices)}"
+                    "When distributing over multiple grids, the product of the number of"
+                    " points of the grids needs to match the number of available devices."
+                    f" Gridpoints: {permutations} Available Devices: {len(avail_devices)}"
                 )
         return self._base_state_action_space.replace(
-                states=MappingProxyType(new_states),
-                continuous_actions=MappingProxyType(new_continuous_actions)
-                )
-
-def _partitioning_algo(grids: list[Grid], devices: list):
-    number_devices = len(devices)
-    print(len(grids[0].to_jax()))
-    first_groups = [[] for i in range(len(grids[0].to_jax()))]
-    for i in range(grids[0].to_jax().shape[0]):
-        for j in range(number_devices//len(grids[0].to_jax())):
-            first_groups[i].append(devices[j+number_devices//grids[0].to_jax().shape[0]])
-    device_orders = [sum(first_groups, [])]
-    last_groups = []        
-    for grid in grids[1:]:
-        n_points = grid.to_jax().shape[0]
-        next_groups = [[] for i in range(n_points)]
-        for group in last_groups:
-            for i in range(n_points):
-                for j in range(len(group)/n_points):
-                    next_groups[i].append(devices[j+number_devices/n_points])
-        device_orders.append(sum(next_groups, []))
-        last_groups = next_groups
-    return device_orders
-        
-
+            states=MappingProxyType(new_states),
+            continuous_actions=MappingProxyType(new_continuous_actions),
+        )
 
 
 @dataclasses.dataclass(frozen=True)
diff --git a/src/lcm/solution/solve_brute.py b/src/lcm/solution/solve_brute.py
@@ -56,8 +56,8 @@ def solve(
 
     next_regime_to_V_arr = MappingProxyType(
         {
-            regime_name: jax.device_put(jnp.zeros(shape), device=sharding)
-            for regime_name, (shape, sharding) in regime_V_shapes.items()
+            regime_name: jax.device_put(jnp.zeros(shape))
+            for regime_name, shape in regime_V_shapes.items()
         }
     )
 
@@ -71,7 +71,7 @@ def solve(
         max_compilation_workers=max_compilation_workers,
         logger=logger,
     )
-    
+
     solution: dict[int, MappingProxyType[RegimeName, FloatND]] = {}
 
     # Async diagnostics accumulators: every `jnp.any(isnan)`,
@@ -134,7 +134,6 @@ def solve(
                 period=jnp.int32(period),
                 age=ages.values[period],
             )
-
             # Async reductions: gated on log level. `"off"` skips
             # everything — no kernel launches, no host syncs, no
             # NaN fail-fast. `"warning"` / `"progress"` launches the
@@ -325,9 +324,7 @@ def _compile_and_log(
             compiled[func_id] = comp
 
     # Map back to (regime, period) keys.
-    return {
-        key: compiled[_func_dedup_key(func=func)] for key, func in all_functions.items()
-    }
+    return {key: func for key, func in all_functions.items()}
 
 
 def _resolve_compilation_workers(*, max_compilation_workers: int | None) -> int:
@@ -378,24 +375,32 @@ def _get_regime_V_shapes_and_shardings(
         Dict of regime names to V array shapes.
 
     """
-    shapes_and_shardings: dict[RegimeName, tuple[tuple[int, ...], jax.NamedSharding]] = {}
-    avail_devices = jax.devices() 
+    shapes_and_shardings: dict[
+        RegimeName, tuple[tuple[int, ...], jax.NamedSharding]
+    ] = {}
+    avail_devices = jax.devices()
     for regime_name, regime in internal_regimes.items():
         state_action_space = regime.state_action_space(
             regime_params=internal_params[regime_name],
         )
         spec = []
         for name in state_action_space.states:
             if regime.grids[name].distributed:
-                spec.append('X') 
+                spec.append("X")
             else:
                 spec.append(None)
         shape = tuple(len(v) for v in state_action_space.states.values())
-        mesh = jax.make_mesh((len(avail_devices),), ('X'), axis_types=(jax.sharding.AxisType.Auto),devices=avail_devices)
-        sharding = jax.NamedSharding(mesh, spec= jax.P(*spec))
-        shapes_and_shardings[regime_name] = (shape, sharding)
+        mesh = jax.make_mesh(
+            (len(avail_devices),),
+            ("X"),
+            axis_types=(jax.sharding.AxisType.Auto),
+            devices=avail_devices,
+        )
+
+        shapes_and_shardings[regime_name] = shape
     return shapes_and_shardings
 
+
 @dataclass(frozen=True)
 class _DiagnosticRow:
     """Metadata captured during the backward-induction loop.