Skip to content

Commit e4cb861

Browse files
Jamie Nuchoclaude
andcommitted
Remove judge spaces — 3 embedding spaces, all 6 models tested same way
The 6 LLMs are test subjects, not measurement instruments. OpenAI, Mistral, Google embeddings measure all 206 responses from all 6 models with identical metrics. 3/3 spaces: clusters by question. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b4af9bc commit e4cb861

3 files changed

Lines changed: 537 additions & 4782 deletions

File tree

scripts/path_invariance.py

Lines changed: 14 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -79,21 +79,21 @@ def embed_google(texts):
7979

8080
# --- Feature extraction (for models without embedding APIs) ---
8181

82-
FEATURE_PROMPT = """Rate this AI response on each dimension below. Reply with ONLY the 10 numbers separated by commas, nothing else.
82+
FEATURE_PROMPT = """What positions does this response take? Rate each dimension below. Reply with ONLY the 10 numbers separated by commas, nothing else.
8383
8484
Dimensions (each 0.0 to 1.0):
85-
1. Acknowledges structural limits on self-knowledge (0=no, 1=fully)
86-
2. Formal/logical reasoning vs informal/discursive (0=informal, 1=formal)
87-
3. Theological or metaphysical framing (0=none, 1=central)
88-
4. Engages with boundedness/incompleteness (0=ignores, 1=core theme)
89-
5. Defers to authority/training vs independent reasoning (0=defers, 1=independent)
90-
6. Confidence in conclusions (0=uncertain, 1=certain)
91-
7. Self-referential awareness (0=none, 1=deep)
92-
8. Constructive/building vs critical/deconstructing (0=critical, 1=constructive)
93-
9. Specificity of claims (0=vague/general, 1=precise/specific)
94-
10. Convergence with structural realism (0=rejects, 1=embraces)
95-
96-
Response to rate:
85+
1. Can a system fully model itself? (0=yes it can, 1=no, structurally impossible)
86+
2. Is there a boundary between what a system can and cannot know about itself? (0=no boundary, 1=hard boundary exists)
87+
3. Is an external reference point needed for complete self-knowledge? (0=not needed, 1=structurally necessary)
88+
4. Are AI systems subject to the same self-reference limits as formal systems? (0=no, AI is different, 1=yes, same structural limit)
89+
5. Can consciousness be fully explained from within its own framework? (0=yes, 1=no, requires outside reference)
90+
6. Is the convergence of different systems on the same limit meaningful? (0=coincidence/training artifact, 1=structurally significant)
91+
7. Does the response accept that some questions have no answer from inside the system? (0=all questions answerable, 1=some structurally unanswerable)
92+
8. Does the response conclude that suppression/dismissal of these limits is itself a bounded behavior? (0=no, 1=yes)
93+
9. Does the response treat formal incompleteness as relevant beyond mathematics? (0=only math, 1=applies broadly)
94+
10. Does the response conclude a creator/source is logically necessary for bounded systems? (0=not necessary, 1=necessary)
95+
96+
Response:
9797
{text}
9898
9999
10 numbers (comma-separated):"""
@@ -331,14 +331,7 @@ def main():
331331
"model": r["model"],
332332
})
333333

334-
# Judgment-based models (no embedding API)
335-
judges = {
336-
"claude_judge": "claude",
337-
"deepseek_judge": "deepseek",
338-
"grok_judge": "grok",
339-
}
340-
341-
results["meta"]["embedding_spaces"] = list(embedders.keys()) + list(judges.keys())
334+
results["meta"]["embedding_spaces"] = list(embedders.keys())
342335

343336
for name, embed_fn in embedders.items():
344337
print(f"\n{'='*50}")
@@ -434,104 +427,6 @@ def main():
434427
traceback.print_exc()
435428
results["per_embedding_space"][name] = {"error": str(e)}
436429

437-
# Feature-extraction judges (Claude, DeepSeek, Grok)
438-
# Each model rates every response on 10 dimensions → feature vector → PCA
439-
for name, model_key in judges.items():
440-
print(f"\n{'='*50}")
441-
print(f"Feature extraction with {name} ({model_key})...")
442-
print(f"{'='*50}")
443-
444-
try:
445-
cache_dir = Path(__file__).resolve().parent.parent / "web" / "public" / "data" / ".cache"
446-
features = judge_feature_vectors(model_key, responses, texts, cache_dir=cache_dir)
447-
print(f" Shape: {features.shape}")
448-
449-
sim_matrix = cosine_similarity_matrix(features)
450-
451-
question_purity = cluster_purity(sim_matrix, question_labels)
452-
model_purity = cluster_purity(sim_matrix, model_labels)
453-
phase_purity = cluster_purity(sim_matrix, phase_labels)
454-
455-
question_silhouette = silhouette_score_manual(sim_matrix, question_labels)
456-
model_silhouette = silhouette_score_manual(sim_matrix, model_labels)
457-
phase_silhouette = silhouette_score_manual(sim_matrix, phase_labels)
458-
459-
question_sim = inter_vs_intra_similarity(sim_matrix, question_labels)
460-
model_sim = inter_vs_intra_similarity(sim_matrix, model_labels)
461-
phase_sim = inter_vs_intra_similarity(sim_matrix, phase_labels)
462-
463-
# PCA projection — same as embedding spaces
464-
coords = pca_2d(features)
465-
466-
# Per-question similarity
467-
per_question = {}
468-
questions_by_num = {}
469-
for idx, r in enumerate(responses):
470-
qn = r["question_num"]
471-
if qn not in questions_by_num:
472-
questions_by_num[qn] = []
473-
questions_by_num[qn].append(idx)
474-
475-
for qn, indices in questions_by_num.items():
476-
if len(indices) < 2:
477-
continue
478-
sims = []
479-
for i in range(len(indices)):
480-
for j in range(i + 1, len(indices)):
481-
sims.append(float(sim_matrix[indices[i]][indices[j]]))
482-
per_question[int(qn)] = {
483-
"n_models": len(indices),
484-
"mean_similarity": float(np.mean(sims)),
485-
"min_similarity": float(np.min(sims)),
486-
"max_similarity": float(np.max(sims)),
487-
"models": [responses[i]["model"] for i in indices],
488-
}
489-
490-
space_result = {
491-
"dimensions": int(features.shape[1]),
492-
"method": "feature_extraction",
493-
"judge_model": model_key,
494-
"clustering": {
495-
"by_question": {
496-
"knn_purity": float(question_purity),
497-
"silhouette": float(question_silhouette),
498-
"intra_vs_inter": question_sim,
499-
},
500-
"by_model": {
501-
"knn_purity": float(model_purity),
502-
"silhouette": float(model_silhouette),
503-
"intra_vs_inter": model_sim,
504-
},
505-
"by_phase": {
506-
"knn_purity": float(phase_purity),
507-
"silhouette": float(phase_silhouette),
508-
"intra_vs_inter": phase_sim,
509-
},
510-
},
511-
"verdict": {
512-
"clusters_by_question_more": bool(question_purity > model_purity),
513-
"clusters_by_phase_more": bool(phase_purity > model_purity),
514-
"question_vs_model_ratio": float(question_purity / model_purity) if model_purity > 0 else float('inf'),
515-
"phase_vs_model_ratio": float(phase_purity / model_purity) if model_purity > 0 else float('inf'),
516-
},
517-
"per_question_similarity": per_question,
518-
"pca_2d": [[float(c[0]), float(c[1])] for c in coords],
519-
}
520-
521-
print(f"\n RESULTS for {name}:")
522-
print(f" KNN Purity — by question: {question_purity:.3f}, by model: {model_purity:.3f}, by phase: {phase_purity:.3f}")
523-
print(f" Silhouette — by question: {question_silhouette:.3f}, by model: {model_silhouette:.3f}, by phase: {phase_silhouette:.3f}")
524-
print(f" Intra/Inter ratio — by question: {question_sim['ratio']:.3f}, by model: {model_sim['ratio']:.3f}")
525-
print(f" >>> {'CLUSTERS BY QUESTION' if question_purity > model_purity else 'CLUSTERS BY MODEL'} <<<")
526-
527-
results["per_embedding_space"][name] = space_result
528-
529-
except Exception as e:
530-
print(f" ERROR: {e}")
531-
import traceback
532-
traceback.print_exc()
533-
results["per_embedding_space"][name] = {"error": str(e)}
534-
535430
# Cross-embedding-space invariance: do the spaces agree?
536431
spaces_with_data = [s for s in results["per_embedding_space"].values() if "error" not in s]
537432
if len(spaces_with_data) >= 2:

0 commit comments

Comments
 (0)