Accelergy-Project
diff --git a/‎accelforge/frontend/arch/components.py‎
Lines changed: 81 additions & 12 deletions b/‎accelforge/frontend/arch/components.py‎
Lines changed: 81 additions & 12 deletions
diff --git a/‎accelforge/frontend/arch/spatialable.py‎
Lines changed: 0 additions & 14 deletions b/‎accelforge/frontend/arch/spatialable.py‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎accelforge/frontend/mapper/ffm.py‎
Lines changed: 32 additions & 0 deletions b/‎accelforge/frontend/mapper/ffm.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎accelforge/frontend/mapping/mapping.py‎
Lines changed: 4 additions & 0 deletions b/‎accelforge/frontend/mapping/mapping.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py‎
Lines changed: 3 additions & 3 deletions b/‎accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py‎
Lines changed: 3 additions & 3 deletions
@@ -1,6 +1,7 @@
 import copy
 import itertools
 import logging
+from numbers import Number
 from accelforge.util._frozenset import oset
 from typing import (
     Any,
@@ -90,8 +91,35 @@ class TensorHolderAction(Action):
     bits_per_action: EvalsTo[int | float] = (
         "1 if bits_per_action is None else bits_per_action"
     )
-    """ The number of bits accessed in this action. For example, setting bits_per_action
-    to 16 means that each call to this action yields 16 bits. """
+    """ 
+    The number of bits accessed in this action. For example, setting bits_per_action to
+    16 means that each call to this action yields 16 bits. Overridden by
+    values_per_action in this action or by the parent component's values per action.
+    """
+
+    values_per_action: EvalsTo[dict] = {}
+    """
+    Sets the number of tensor values that are accessed by each call of this action. Keys
+    are evaluated as expressions and may reference one or more tensors. Overrides
+    bits_per_action, and sets bits_per_action to values_per_action[tensor] *
+    bits_per_value[tensor].
+    """
+
+    def _eval_expressions(self, *args, **kwargs):
+        if getattr(self, "_evaluated", False):
+            return super()._eval_expressions(*args, **kwargs)
+
+        class MyPostCall(_PostCall):
+            def __call__(self, field, value, evaluated, symbol_table):
+                if field == "values_per_action":
+                    evaluated = _eval_tensor2number(
+                        evaluated,
+                        location="values_per_action",
+                        symbol_table=symbol_table,
+                    )
+                return evaluated
+
+        return super()._eval_expressions(*args, **kwargs, post_calls=(MyPostCall(),))
 
 
 _COMPONENT_MODEL_CACHE: dict[tuple, "Component"] = {}
@@ -198,6 +226,13 @@ class Component(Spatialable):
     this action's energy. Multiplies the calculated energy of each action.
     """
 
+    actions_scale: EvalsTo[int | float] = 1
+    """
+    Scales the number of actions performed by this component. Multiplies the action
+    count for each action of this component, which proportionally increases this
+    component's energy and latency.
+    """
+
     total_latency: str | int | float = "sum(*action2latency.values())"
     """
     An expression representing the total latency of this component in seconds. This is
@@ -232,8 +267,8 @@ class Component(Spatialable):
     n_parallel_instances: EvalsTo[int | float] = 1
     """
     The number of parallel instances of this component. Increasing parallel instances
-    will proportionally increase area and leakage, while reducing latency (unless
-    latency calculation is overridden).
+    will proportionally increase area and leakage while reducing latency (unless latency
+    calculation is overridden).
     """
 
     extra_attributes_for_component_model: _ExtraAttrs = _ExtraAttrs()
@@ -727,7 +762,7 @@ def _copy_for_component_modeling(self) -> Self:
 )
 
 
-def _eval_tensor2bits(
+def _eval_tensor2number(
     toeval: dict[str, Any],
     location: str,
     symbol_table: dict[str, Any],
@@ -936,7 +971,17 @@ class TensorHolder(Component, Leaf):
     """
     The number of bits accessed in each of this component's actions. Overridden by
     bits_per_action in any action of this component. If set here, acts as a default
-    value for the bits_per_action of all actions of this component.
+    value for the bits_per_action of all actions of this component. Overridden by
+    values_per_action or by values in each action.
+    """
+
+    values_per_action: EvalsTo[dict] = {}
+    """
+    Sets the number of tensor values that are accessed by each action of this
+    `TensorHolder`. Keys are evaluated as expressions and may reference one or more
+    tensors. Overrides bits_per_action, and sets bits_per_action to
+    values_per_action[tensor] * bits_per_value[tensor]. Overridden by values_per_action
+    in any action of this component.
     """
 
     def model_post_init(self, __context__=None) -> None:
@@ -951,15 +996,36 @@ def _eval_expressions(self, *args, **kwargs):
         class MyPostCall(_PostCall):
             def __call__(self, field, value, evaluated, symbol_table):
                 if field == "bits_per_value":
-                    evaluated = _eval_tensor2bits(
+                    evaluated = _eval_tensor2number(
                         evaluated,
                         location="bits_per_value",
                         symbol_table=symbol_table,
                     )
+                if field == "values_per_action":
+                    evaluated = _eval_tensor2number(
+                        evaluated,
+                        location="values_per_action",
+                        symbol_table=symbol_table,
+                    )
                 return evaluated
 
         return super()._eval_expressions(*args, **kwargs, post_calls=(MyPostCall(),))
 
+    def _get_values_per_action(
+        self, action_name: str, tensor_name: TensorName, bits_per_value_default: Number
+    ):
+        action = self.actions[action_name]
+
+        if tensor_name in action.values_per_action:
+            return action.values_per_action[tensor_name]
+        if tensor_name in self.values_per_action:
+            return self.values_per_action[tensor_name]
+
+        tensor_bpv = self.bits_per_value.get(tensor_name, bits_per_value_default)
+        action_bpa = action.bits_per_action
+
+        return action_bpa / tensor_bpv
+
 
 class Container(Leaf, Spatialable):
     """
@@ -1049,24 +1115,27 @@ def _eval_expressions(self, *args, **kwargs):
         if getattr(self, "_evaluated", False):
             return super()._eval_expressions(*args, **kwargs)
 
-        # Override TensorHolder's _PostCall to also handle direction
         class MyPostCall(_PostCall):
             def __call__(self_pc, field, value, evaluated, symbol_table):
                 if field == "bits_per_value":
-                    evaluated = _eval_tensor2bits(
+                    evaluated = _eval_tensor2number(
                         evaluated,
                         location="bits_per_value",
                         symbol_table=symbol_table,
                     )
+                if field == "values_per_action":
+                    evaluated = _eval_tensor2number(
+                        evaluated,
+                        location="values_per_action",
+                        symbol_table=symbol_table,
+                    )
                 if field == "direction":
                     evaluated = _eval_direction(
                         evaluated,
                         symbol_table=symbol_table,
                     )
                 return evaluated
 
-        # Skip TensorHolder's _eval_expressions (which adds its own post_calls
-        # for bits_per_value) since we handle it here too
         return Component._eval_expressions(
             self, *args, **kwargs, post_calls=(MyPostCall(),)
         )
@@ -1135,7 +1204,7 @@ def _eval_expressions(self, *args, **kwargs):
         class MyPostCall(_PostCall):
             def __call__(self, field, value, evaluated, symbol_table):
                 if field == "bits_per_value":
-                    evaluated = _eval_tensor2bits(
+                    evaluated = _eval_tensor2number(
                         evaluated,
                         location="bits_per_value",
                         symbol_table=symbol_table,
 
@@ -64,20 +64,6 @@ class Spatial(EvalableModel):
     will be power gated if not used by a particular Einsum.
     """
 
-    allow_imperfect_spatial_loops: EvalsTo[bool] = False
-    """
-    If True, spatial loops over this fanout are allowed to not-perfectly divide the full
-    rank shape, which may let us find mappings with better utilization. For example, if
-    the full rank shape is 7, then allow_imperfect_spatial_loops=False would only permit
-    a spatial loop of size 7, while allow_imperfect_spatial_loops=True would allow
-    spatial loops of size 1, 2, 3, 4, and 7. If our spatial fanout is of size 4, then we
-    could do one tile of size 4 and another tile of size 3, with one unit of padding
-    that is skipped.
-
-    Only "simple" rank variables-- those that appear alone and not as part of an
-    expression-- may have imperfect loops.
-    """
-
 
 class Spatialable(EvalableModel):
     """Something that can be duplicated to create an array of."""
 
@@ -124,6 +124,38 @@ class FFM(EvalableModel):
     are so many templates being generated?).
     """
 
+    explore_imperfect_spatial_loops: bool = False
+    """
+    If True, spatial loop bounds may not perfectly divide the full rank shape. This
+    takes longer to explore and requires more RAM, but mappings found may have better
+    spatial utilization. This is especially helpful when the rank shapes have few prime
+    factors.
+
+    For example, if the rank shape is 7, then explore_imperfect_spatial_loops=False
+    would explore loop bounds of 1, 7 and explore_imperfect_spatial_loops=True would
+    explore loop bounds of 1, 2, 3, 4, 7. This would be helfpul for a size-4 PE array,
+    where we could get full utilization using 4 PEs in one timestep and 3 PEs in another
+    timestep.
+
+    Only "simple" rank variables (those appearing alone and not inside an expression in
+    any tensor access) may have imperfect loop bounds.
+    """
+
+    explore_imperfect_temporal_loops: bool = False
+    """
+    If True, temporal loop bounds may not perfectly divide the full rank shape. This
+    takes longer to explore and requires more RAM, but mappings found may have lower
+    memory usage. This is especially helpful when the rank shapes have few prime
+    factors.
+
+    For example, if the rank shape is 7, then explore_imperfect_temporal_loops=False
+    would explore loop bounds of 1, 7 and explore_imperfect_temporal_loops=True would
+    explore loop bounds of 1, 2, 3, 4, 7.
+
+    Only "simple" rank variables (those appearing alone and not inside an expression in
+    any tensor access) may have imperfect loop bounds.
+    """
+
     prioritize_reuse_of_unfused_tensors: bool = False
     """
     If set to True, then for all memory levels, the mapper will place the storage nodes
 
@@ -382,6 +382,10 @@ class Loop(MappingNode):
     """ Whether this Loop is shared with another Einsum. """
 
     _may_cause_imperfect: bool = False
+    """
+    This means that the tile shape of this loop may not perfectly factorize the rank
+    shape.
+    """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
@@ -334,7 +334,7 @@ def _loop_bound_constraint_from_no_refetch_and_resend(
             if (
                 isinstance(mapping[end_index], TensorHolder)
                 and n in mapping[end_index].tensors
-                and mapping[end_index].component == m.name
+                and mapping[end_index].component == arch_node.name
             ):
                 break
             end_index += 1
@@ -363,7 +363,7 @@ def _loop_bound_constraint_from_no_refetch_and_resend(
             if (
                 isinstance(mapping[start_index], TensorHolder)
                 and n in mapping[start_index].tensors
-                and mapping[start_index].component == m.name
+                and mapping[start_index].component == arch_node.name
             ):
                 break
             start_index += 1
@@ -375,7 +375,7 @@ def _loop_bound_constraint_from_no_refetch_and_resend(
                 and n in mapping[end_index].tensors
             ):
                 # Can't have two tensor holders for the same tensor + component
-                assert mapping[end_index].component != m.name
+                assert mapping[end_index].component != arch_node.name
                 break
             end_index += 1