Skip to content

Commit dd47699

Browse files
committed
[network] Clean up Claude output
1 parent b9c7d4a commit dd47699

3 files changed

Lines changed: 18 additions & 48 deletions

File tree

accelforge/model/_looptree/reuse/symbolic/_network.py

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ def per_loop_transfer_cost(
9494
class MeshTopologyModel(TopologyModel):
9595
"""Cost model for a mesh network.
9696
97-
Data travels link-by-link along one axis of the mesh. Multicast delivers a
98-
value to every point along the dimension; unicast delivers a distinct value
99-
to each point. When the source is physically distributed, data is bound as
100-
locally as possible across the physical buffers.
97+
Data travels along one axis of the mesh. Multicast delivers a value to every
98+
point along the dimension; unicast delivers a distinct value to each point.
99+
When the source is physically distributed, data is bound as locally as
100+
possible across the physical buffers.
101101
"""
102102

103103
def per_loop_transfer_cost(
@@ -153,23 +153,13 @@ def per_loop_transfer_cost(
153153

154154

155155
class AllToAllTopologyModel(TopologyModel):
156-
"""Cost model for an all-to-all network built around a switch (e.g. NVLink /
157-
NVSwitch).
158-
159-
Every node connects to every other node through a central switch, so any
160-
source reaches any destination in a constant number of hops regardless of
161-
how far apart they are in the logical fanout. This differs from a mesh in
162-
two ways:
163-
164-
- **Uniform latency.** The longest route is a single switch traversal, so
165-
``max_hops`` is constant rather than growing with the distance
166-
(``shape_repeats * stride``) between source and destination.
167-
- **No store-and-forward accumulation.** Each destination is reached
168-
directly, so the total (energy) cost is linear in the number of
169-
destinations rather than quadratic as in a mesh unicast.
170-
171-
The physical stride is irrelevant here (all nodes are equidistant from the
172-
switch), so ``last_fanout`` and physical distribution are not consulted.
156+
"""Cost model for an all-to-all network using a switch (e.g. NVLink).
157+
158+
Every node connects to every other node through a switch, so any
159+
source reaches any destination in one hop regardless of
160+
161+
Physical stride is irrelevant, so ``last_fanout`` and physical distribution
162+
are not used.
173163
"""
174164

175165
HOPS_PER_TRANSFER = 1
@@ -219,9 +209,7 @@ def per_loop_transfer_cost(
219209
)
220210

221211

222-
# Registry mapping each topology to the model class that costs its data
223-
# movement. Classes (not instances) are stored because models are stateful and
224-
# each NetworkAnalyzer needs its own.
212+
# Registry of topology models
225213
TOPOLOGY_MODELS: dict[TopologySpec, type[TopologyModel]] = {
226214
TopologySpec.MESH: MeshTopologyModel,
227215
TopologySpec.ALL_TO_ALL: AllToAllTopologyModel,

tests/network/input_files/networked/hierarchical_1d_all_to_all.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ arch:
2626
leak_power: 0
2727
total_latency: "max_hops"
2828
actions:
29-
- {name: hops, energy: 1, latency: 0, throughput: 1}
29+
- {name: hops, energy: 1, latency: 1, throughput: inf}
3030

3131
- !Memory
3232
name: Scratchpad
@@ -40,9 +40,7 @@ arch:
4040
spatial:
4141
- {name: X, fanout: 4}
4242

43-
# All-to-all switch (NVLink-like): every node is one switch hop from every
44-
# other, so unicast and multicast cost the same total hops and max_hops is
45-
# constant. Fanout is 4 so this differs observably from a mesh.
43+
# All-to-all switch (NVLink-like): every node is one hop from every other
4644
- !Network
4745
name: MacArray
4846
topology: all_to_all

tests/network/test_network.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -334,9 +334,7 @@ def test_flat(self):
334334

335335

336336
class TestModelAllToAll(TestCase):
337-
"""Full-model evaluation of the 1D hierarchy where MacArray is an all-to-all
338-
switch (NVLink-like) instead of a mesh. PeArray remains a mesh, so the two
339-
networks can be contrasted within a single run."""
337+
"""MacArray is an all-to-all switch (NVLink-like). PeArray is a mesh."""
340338

341339
def test_hierarchical_1d_all_to_all(self):
342340
M = 8
@@ -360,16 +358,13 @@ def test_hierarchical_1d_all_to_all(self):
360358
result = spec.evaluate_mapping()
361359

362360
# --- MacArray: all-to-all switch ---------------------------------
363-
# On a switch every node is one hop away, so unicast (T0, W0) collapses
364-
# to the same (MAC_TILE - 1) linear cost as multicast (T1): all equal.
365-
# Contrast test_hierarchical_1d, where the mesh makes T0/W0 quadratic
366-
# (sum(range(MAC_TILE))).
361+
# Every node is one hop away
367362
all_to_all = (
368363
(M / M_TILE)
369364
* (KN / MAC_TILE) # number of used Scratchpad
370365
* M_TILE
371366
* KN # temporal for n1 in mapping
372-
* (MAC_TILE - 1) # one switch hop per destination, for every tensor
367+
* (MAC_TILE - 1) # one hop per destination, for every tensor
373368
* BITS_PER_VALUE
374369
)
375370
for tensor in ("T0", "T1", "W0"):
@@ -381,17 +376,6 @@ def test_hierarchical_1d_all_to_all(self):
381376
msg=f"unexpected MacArray hops for {tensor}",
382377
)
383378

384-
# Guard: a mesh would make the unicast tensors strictly more expensive.
385-
mesh_unicast = (
386-
(M / M_TILE)
387-
* (KN / MAC_TILE)
388-
* M_TILE
389-
* KN
390-
* sum(range(MAC_TILE)) # quadratic on a mesh
391-
* BITS_PER_VALUE
392-
)
393-
self.assertGreater(mesh_unicast, all_to_all)
394-
395379
# --- PeArray: still a mesh ---------------------------------------
396380
# Unchanged from test_hierarchical_1d, so the mesh formulas hold (now
397381
# with MAC_TILE = 4, i.e. KN // MAC_TILE = 2).
@@ -422,7 +406,7 @@ def test_hierarchical_1d_all_to_all(self):
422406

423407
# --- Latency ------------------------------------------------------
424408
# The switch's uniform single-hop routing gives MacArray a constant
425-
# latency of 1, versus the mesh PeArray's distance-dependent 2.
409+
# latency of 1, versus the mesh PeArray's 2.
426410
self.assertEqual(
427411
result.data["Matmul0<SEP>latency<SEP>MacArray"].iloc[0], 1
428412
)

0 commit comments

Comments
 (0)