Skip to content

Commit 0ce18ea

Browse files
committed
Ch8 updates, add sandia method scenes and notebook updates.
1 parent 861247c commit 0ce18ea

6 files changed

Lines changed: 230 additions & 32 deletions

File tree

Chapter8/Scene2.py

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
CHAPTER8_MATRIX_DATA,
1414
CHAPTER8_A2_DATA,
1515
CHAPTER8_TRIANGLE_DATA,
16+
CHAPTER8_L_DATA,
17+
CHAPTER8_L_MASKED_DATA,
1618
highlight_triangle,
1719
animate_vertex_fill,
1820
)
@@ -149,7 +151,7 @@ def construct(self):
149151
self.play(Write(div_text))
150152
self.wait(1)
151153

152-
# Fade out
154+
# Fade out classical method
153155
self.play(
154156
FadeOut(title),
155157
FadeOut(code),
@@ -159,3 +161,137 @@ def construct(self):
159161
FadeOut(result_group),
160162
)
161163
self.wait(0.5)
164+
165+
# ===== SANDIA METHOD SECTION =====
166+
167+
# Title for Sandia method
168+
sandia_title = Text("The Sandia Method", font_size=42, color=GREEN)
169+
sandia_title.to_edge(UP, buff=0.3)
170+
171+
with self.voiceover(
172+
"""This approach works, but we can make it more efficient.
173+
The classical method has two issues. First, our symmetric
174+
matrix counts every edge twice, forward and backward.
175+
Second, we perform two separate operations: matrix multiply,
176+
then element-wise multiply."""
177+
):
178+
self.play(Write(sandia_title))
179+
180+
# Show issues with classical method
181+
issues = VGroup(
182+
Text("Issues with A² ⊙ A:", font_size=28, color=YELLOW),
183+
Text("• Symmetric matrix = counting edges twice", font_size=24),
184+
Text("• Two operations: mxm then ewise_mult", font_size=24),
185+
).arrange(DOWN, buff=0.2, aligned_edge=LEFT)
186+
issues.move_to(ORIGIN)
187+
self.play(Write(issues))
188+
self.wait(1)
189+
190+
# Introduce solution
191+
with self.voiceover(
192+
"""The Sandia method addresses both issues. We start by
193+
selecting only the lower triangular portion of A. This
194+
gives us matrix L with half the edges."""
195+
):
196+
self.play(FadeOut(issues))
197+
198+
# Show A and L side by side
199+
A_label2 = Text("A", font_size=28, color=BLUE)
200+
A_mat2 = create_sparse_matrix(CHAPTER8_MATRIX_DATA, scale=0.35, v_buff=0.45, h_buff=0.45)
201+
A_group2 = VGroup(A_label2, A_mat2).arrange(DOWN, buff=0.2)
202+
203+
arrow = MathTex(r"\xrightarrow{\text{tril}}", font_size=36)
204+
205+
L_label = Text("L", font_size=28, color=GREEN)
206+
L_mat = create_sparse_matrix(CHAPTER8_L_DATA, scale=0.35, v_buff=0.45, h_buff=0.45)
207+
L_group = VGroup(L_label, L_mat).arrange(DOWN, buff=0.2)
208+
209+
tril_group = VGroup(A_group2, arrow, L_group).arrange(RIGHT, buff=0.5)
210+
tril_group.move_to(ORIGIN).shift(UP * 0.5)
211+
212+
self.play(Write(A_label2), Create(A_mat2))
213+
self.play(Write(arrow))
214+
self.play(Write(L_label), Create(L_mat))
215+
self.wait(1)
216+
217+
# Show the masked multiply
218+
with self.voiceover(
219+
"""Then we use L as its own mask during the matrix multiply.
220+
The notation L, open paren, L dot S, close paren, means:
221+
only compute entries where L already has values. This
222+
combines the multiply and mask into one operation."""
223+
):
224+
self.play(FadeOut(A_group2), FadeOut(arrow))
225+
226+
# Move L to the left
227+
self.play(L_group.animate.to_edge(LEFT, buff=0.8))
228+
229+
# Show the masked operation code
230+
sandia_code = Code(
231+
code_string="L(L.S) << L.mxm(L)",
232+
language="python",
233+
background="window",
234+
).scale(0.8)
235+
sandia_code.move_to(ORIGIN).shift(UP * 0.3)
236+
self.play(Create(sandia_code))
237+
self.wait(1)
238+
239+
# Show result matrix
240+
with self.voiceover(
241+
"""The result shows exactly one entry per triangle.
242+
Position 2,0 captures the triangle between nodes 0, 1, and 2.
243+
Position 3,0 captures nodes 0, 2, and 3. And so on.
244+
Each triangle appears exactly once in the lower triangular
245+
portion."""
246+
):
247+
# Show the result matrix
248+
result_label = Text("Result", font_size=28, color=ORANGE)
249+
result_mat = create_sparse_matrix(CHAPTER8_L_MASKED_DATA, scale=0.35, v_buff=0.45, h_buff=0.45)
250+
result_group2 = VGroup(result_label, result_mat).arrange(DOWN, buff=0.2)
251+
result_group2.to_edge(RIGHT, buff=0.8)
252+
253+
self.play(Write(result_label), Create(result_mat))
254+
self.wait(1)
255+
256+
# Show final count
257+
with self.voiceover(
258+
"""The sum is simply 4. No division by 6 needed because
259+
each triangle is counted exactly once. Same answer,
260+
computed more efficiently."""
261+
):
262+
self.play(FadeOut(sandia_code))
263+
264+
# Show sum
265+
sum_text2 = MathTex(r"\text{sum} = 4 \text{ triangles}", font_size=32, color=GREEN)
266+
sum_text2.move_to(ORIGIN)
267+
self.play(Write(sum_text2))
268+
self.wait(1)
269+
270+
# Code comparison
271+
with self.voiceover(
272+
"""Here's the complete Sandia method. Select the lower
273+
triangular portion, perform the masked matrix multiply,
274+
and sum the result. Three lines of code, half the edges
275+
to process, and no post-processing division."""
276+
):
277+
self.play(FadeOut(L_group), FadeOut(result_group2), FadeOut(sum_text2))
278+
279+
# Show full Sandia code
280+
full_code = Code(
281+
code_string="""# Sandia method
282+
L = A.select('tril')
283+
L(L.S) << L.mxm(L)
284+
triangles = L.reduce_scalar()""",
285+
language="python",
286+
background="window",
287+
).scale(0.8)
288+
full_code.move_to(ORIGIN)
289+
self.play(Create(full_code))
290+
self.wait(1)
291+
292+
# Final fade out
293+
self.play(
294+
FadeOut(sandia_title),
295+
FadeOut(full_code),
296+
)
297+
self.wait(0.5)

Chapter8/Scene6.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,19 @@ def construct(self):
6060
A_group = VGroup(A_label, A_mat, A_desc).arrange(DOWN, buff=0.15)
6161
A_group.move_to(ORIGIN).shift(DOWN * 0.5)
6262

63-
# Right: T indicator matrix (binary version)
63+
# Right: T matrix (binary version)
6464
T_indicator = [[1 if v > 0 else 0 for v in row] for row in CHAPTER8_TRIANGLE_DATA]
6565
T_mat = create_sparse_matrix(T_indicator, scale=0.35, v_buff=0.4, h_buff=0.4)
66-
T_label = MathTex("T_{ind}", font_size=28, color=ORANGE)
67-
T_desc = Text("T > 0 (binary)", font_size=16, color=GRAY)
66+
T_label = MathTex("T", font_size=28, color=ORANGE)
67+
T_desc = Text("T > 0 (bool)", font_size=16, color=GRAY)
6868
T_group = VGroup(T_label, T_mat, T_desc).arrange(DOWN, buff=0.15)
6969
T_group.to_edge(RIGHT, buff=0.8).shift(DOWN * 0.5)
7070

7171
with self.voiceover(
7272
"""We need three components. First, y: the row sums of our
7373
triangle matrix T, giving per-node triangle counts before
7474
dividing by 2. Second, A: our adjacency matrix. Third,
75-
T indicator: a binary matrix showing where triangles exist."""
75+
the triangle matrix T: a boolean matrix showing where triangles exist."""
7676
):
7777
self.play(Write(y_label), Create(y_mat), Write(y_desc))
7878
self.play(Write(A_label), Create(A_mat), Write(A_desc))
@@ -94,20 +94,20 @@ def construct(self):
9494
step1.next_to(formula, DOWN, buff=0.6)
9595

9696
step2 = VGroup(
97-
MathTex(r"T_{ind} \cdot y", font_size=32, color=ORANGE),
97+
MathTex(r"T \cdot y", font_size=32, color=ORANGE),
9898
Text("= sum of TRIANGLE neighbors' counts", font_size=20),
9999
).arrange(RIGHT, buff=0.3)
100100
step2.next_to(step1, DOWN, buff=0.4)
101101

102102
step3 = VGroup(
103-
MathTex(r"3(A \cdot y) - 2(T_{ind} \cdot y)", font_size=32),
103+
MathTex(r"3(A \cdot y) - 2(T \cdot y)", font_size=32),
104104
Text("= weighted combination", font_size=20),
105105
).arrange(RIGHT, buff=0.3)
106106
step3.next_to(step2, DOWN, buff=0.4)
107107

108108
with self.voiceover(
109109
"""A times y computes, for each node, the sum of all its
110-
neighbors' triangle counts. T indicator times y computes
110+
neighbors' triangle counts. T times y computes
111111
the sum of only triangle neighbors' counts. The difference
112112
with coefficients 3 and 2 means non-triangle neighbors
113113
contribute more to your centrality score."""
@@ -142,20 +142,23 @@ def construct(self):
142142
FadeOut(weight_explanation),
143143
)
144144

145-
code_lines = [
146-
"# y = per-node triangle counts",
147-
"y = T.sum(axis=1)",
148-
"",
149-
"# k = normalization factor",
150-
"k = y.sum()",
151-
"",
152-
"# Triangle centrality",
153-
"Ay = A @ y",
154-
"Ty = (T > 0) @ y",
155-
"tc = (3*Ay - 2*Ty + y) / k",
156-
]
145+
code_lines = """def triangle_centrality(A):
146+
# y = per-node triangle counts
147+
T = A.dup(clear=True)
148+
T(A.S) << A.mxm(A.T)
149+
y = T.reduce_rowwise(binary.plus).new()
150+
151+
# k = normalization factor (sum of all triangle counts)
152+
k = y.reduce(binary.plus).new().value
153+
154+
# T1 = where triangles exist (binary mask)
155+
T1 = T.dup(bool)
156+
157+
tc = (3 * (A @ y) + -2 * (T1 @ y) + y) / k
158+
159+
return tc.new()"""
157160
code = Code(
158-
code_string="\n".join(code_lines),
161+
code_string=code_lines,
159162
language="python",
160163
background="window",
161164
).scale(0.7)
@@ -164,7 +167,7 @@ def construct(self):
164167
with self.voiceover(
165168
"""In code, the computation is straightforward. We compute y
166169
as row sums of T. k is the total for normalization. Then
167-
A times y gives all-neighbor sums, T indicator times y gives
170+
A times y gives all-neighbor sums, T times y gives
168171
triangle-neighbor sums, and we combine them with the weights
169172
3, negative 2, and 1."""
170173
):

docs/Chapter8_480p15.mp4

10.5 MB
Binary file not shown.

notebooks/08_triangle_counting.ipynb

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393
},
9494
{
9595
"cell_type": "code",
96-
"execution_count": 30,
96+
"execution_count": 4,
9797
"metadata": {},
9898
"outputs": [
9999
{
@@ -133,7 +133,9 @@
133133
{
134134
"cell_type": "code",
135135
"execution_count": 6,
136-
"metadata": {},
136+
"metadata": {
137+
"scrolled": true
138+
},
137139
"outputs": [
138140
{
139141
"name": "stdout",
@@ -154,13 +156,47 @@
154156
"cell_type": "markdown",
155157
"metadata": {},
156158
"source": [
157-
"## Per-Node Triangle Count"
159+
"## More efficient Sandia Method"
160+
]
161+
},
162+
{
163+
"cell_type": "markdown",
164+
"metadata": {},
165+
"source": [
166+
"The above triangle counting scheme works, but is not maximally efficient. First, it considers the entire symmetric graph, which involves counting both forward and backward edges, second it does an `ewise_mult` after the matrix multiply, requiring two operations instead of one. The \"sandia\" method considers only the lower triangluar half of the matrix, counting half as many edges, and combines the ewise_mult operation into the matrix multiplication by using A as its own mask:"
158167
]
159168
},
160169
{
161170
"cell_type": "code",
162171
"execution_count": 7,
163172
"metadata": {},
173+
"outputs": [
174+
{
175+
"name": "stdout",
176+
"output_type": "stream",
177+
"text": [
178+
"Total triangles: 45\n"
179+
]
180+
}
181+
],
182+
"source": [
183+
"L = A.select('tril').new()\n",
184+
"L(L.S) << L.mxm(L)\n",
185+
"total = L.reduce_scalar(binary.plus).get()\n",
186+
"print(f\"Total triangles: {num_triangles}\")"
187+
]
188+
},
189+
{
190+
"cell_type": "markdown",
191+
"metadata": {},
192+
"source": [
193+
"## Per-Node Triangle Count"
194+
]
195+
},
196+
{
197+
"cell_type": "code",
198+
"execution_count": 8,
199+
"metadata": {},
164200
"outputs": [
165201
{
166202
"name": "stdout",
@@ -179,7 +215,7 @@
179215
},
180216
{
181217
"cell_type": "code",
182-
"execution_count": 8,
218+
"execution_count": 9,
183219
"metadata": {},
184220
"outputs": [
185221
{
@@ -221,7 +257,7 @@
221257
},
222258
{
223259
"cell_type": "code",
224-
"execution_count": 9,
260+
"execution_count": 10,
225261
"metadata": {},
226262
"outputs": [
227263
{
@@ -271,7 +307,7 @@
271307
},
272308
{
273309
"cell_type": "code",
274-
"execution_count": 50,
310+
"execution_count": 11,
275311
"metadata": {},
276312
"outputs": [
277313
{
@@ -297,7 +333,7 @@
297333
" - A @ y = sum of ALL neighbors' triangle counts\n",
298334
" - T1 @ y = sum of TRIANGLE neighbors' triangle counts\n",
299335
" \n",
300-
" The 3 vs 2 weighting means non-triangle neighbors count MORE than triangle neighbors.\n",
336+
" The 3 vs 2 weighting means non-triangle neighbors count more than triangle neighbors.\n",
301337
" \"\"\"\n",
302338
" # y = per-node triangle counts\n",
303339
" T = A.dup(clear=True)\n",
@@ -320,7 +356,7 @@
320356
},
321357
{
322358
"cell_type": "code",
323-
"execution_count": 47,
359+
"execution_count": 12,
324360
"metadata": {},
325361
"outputs": [
326362
{
@@ -352,7 +388,7 @@
352388
},
353389
{
354390
"cell_type": "code",
355-
"execution_count": 48,
391+
"execution_count": 13,
356392
"metadata": {},
357393
"outputs": [
358394
{
@@ -409,7 +445,7 @@
409445
},
410446
{
411447
"cell_type": "code",
412-
"execution_count": 49,
448+
"execution_count": 14,
413449
"metadata": {},
414450
"outputs": [
415451
{

scene_utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
CHAPTER8_TRIANGLE_DATA,
99
CHAPTER8_TRIANGLES,
1010
CHAPTER8_PER_NODE_TRIANGLES,
11+
CHAPTER8_L_DATA,
12+
CHAPTER8_L_MASKED_DATA,
1113
create_labeled_matrix,
1214
create_sparse_matrix,
1315
create_incidence_matrices,

0 commit comments

Comments
 (0)