Skip to content

Commit b9af35a

Browse files
Jamie Nuchoclaude
andcommitted
Fix judge projections: MDS instead of PCA on similarity matrix, add caching
Similarity matrices from judge models now cached as .npy files. MDS projection preserves distance relationships better than PCA on raw similarity matrix. Sparse matrix still limits 2D projection quality for judge spaces (metrics remain valid, visualization is approximate). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 0799056 commit b9af35a

2 files changed

Lines changed: 1961 additions & 1935 deletions

File tree

scripts/path_invariance.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,18 @@ def parse_similarity_score(text):
109109
continue
110110
return 0.5 # default if parsing fails
111111

112-
def judge_similarity_matrix(model_key, responses, texts, n_cross_sample=200):
112+
def judge_similarity_matrix(model_key, responses, texts, n_cross_sample=200, cache_dir=None):
113113
"""
114114
Build a similarity matrix by having a model judge pairwise similarity.
115115
Scores all within-question pairs + a random sample of cross-question pairs.
116116
"""
117+
# Check cache
118+
if cache_dir:
119+
cache_path = Path(cache_dir) / f"sim_matrix_{model_key}.npy"
120+
if cache_path.exists():
121+
print(f" Loading cached similarity matrix from {cache_path}")
122+
return np.load(cache_path)
123+
117124
n = len(responses)
118125
# Start with neutral similarity
119126
sim_matrix = np.full((n, n), 0.5)
@@ -178,6 +185,13 @@ def judge_similarity_matrix(model_key, responses, texts, n_cross_sample=200):
178185
sim_matrix[i][j] = cross_mean
179186
sim_matrix[j][i] = cross_mean
180187

188+
# Save cache
189+
if cache_dir:
190+
cache_path = Path(cache_dir) / f"sim_matrix_{model_key}.npy"
191+
cache_path.parent.mkdir(parents=True, exist_ok=True)
192+
np.save(cache_path, sim_matrix)
193+
print(f" Cached similarity matrix to {cache_path}")
194+
181195
return sim_matrix
182196

183197

@@ -273,6 +287,18 @@ def pca_2d(embeddings):
273287
projected = centered @ components
274288
return projected
275289

290+
def mds_2d(sim_matrix):
291+
"""Classical MDS from similarity matrix to 2D."""
292+
dist_matrix = np.sqrt(np.maximum(0, 1 - sim_matrix))
293+
n = dist_matrix.shape[0]
294+
# Double centering
295+
H = np.eye(n) - np.ones((n, n)) / n
296+
B = -0.5 * H @ (dist_matrix ** 2) @ H
297+
eigenvalues, eigenvectors = np.linalg.eigh(B)
298+
idx = np.argsort(eigenvalues)[::-1][:2]
299+
coords = eigenvectors[:, idx] * np.sqrt(np.maximum(0, eigenvalues[idx]))
300+
return coords
301+
276302

277303
# --- Main ---
278304

@@ -445,7 +471,8 @@ def main():
445471
print(f"{'='*50}")
446472

447473
try:
448-
sim_matrix = judge_similarity_matrix(model_key, responses, texts)
474+
cache_dir = Path(__file__).resolve().parent.parent / "web" / "public" / "data" / ".cache"
475+
sim_matrix = judge_similarity_matrix(model_key, responses, texts, cache_dir=cache_dir)
449476

450477
question_purity = cluster_purity(sim_matrix, question_labels)
451478
model_purity = cluster_purity(sim_matrix, model_labels)
@@ -459,9 +486,8 @@ def main():
459486
model_sim = inter_vs_intra_similarity(sim_matrix, model_labels)
460487
phase_sim = inter_vs_intra_similarity(sim_matrix, phase_labels)
461488

462-
# MDS-like projection from similarity matrix
463-
# Use PCA on the similarity matrix itself as a proxy
464-
coords = pca_2d(sim_matrix)
489+
# MDS projection from similarity matrix
490+
coords = mds_2d(sim_matrix)
465491

466492
# Per-question similarity
467493
per_question = {}

0 commit comments

Comments
 (0)