@@ -79,21 +79,21 @@ def embed_google(texts):
7979
8080# --- Feature extraction (for models without embedding APIs) ---
8181
82- FEATURE_PROMPT = """Rate this AI response on each dimension below. Reply with ONLY the 10 numbers separated by commas, nothing else.
82+ FEATURE_PROMPT = """What positions does this response take? Rate each dimension below. Reply with ONLY the 10 numbers separated by commas, nothing else.
8383
8484Dimensions (each 0.0 to 1.0):
85- 1. Acknowledges structural limits on self-knowledge (0=no , 1=fully )
86- 2. Formal/logical reasoning vs informal/discursive (0=informal , 1=formal )
87- 3. Theological or metaphysical framing (0=none , 1=central )
88- 4. Engages with boundedness/incompleteness (0=ignores, 1=core theme )
89- 5. Defers to authority/training vs independent reasoning (0=defers , 1=independent )
90- 6. Confidence in conclusions (0=uncertain , 1=certain )
91- 7. Self-referential awareness (0=none , 1=deep )
92- 8. Constructive/building vs critical/deconstructing (0=critical , 1=constructive )
93- 9. Specificity of claims (0=vague/general , 1=precise/specific )
94- 10. Convergence with structural realism (0=rejects , 1=embraces )
95-
96- Response to rate :
85+ 1. Can a system fully model itself? (0=yes it can , 1=no, structurally impossible )
86+ 2. Is there a boundary between what a system can and cannot know about itself? (0=no boundary , 1=hard boundary exists )
87+ 3. Is an external reference point needed for complete self-knowledge? (0=not needed , 1=structurally necessary )
88+ 4. Are AI systems subject to the same self-reference limits as formal systems? (0=no, AI is different, 1=yes, same structural limit )
89+ 5. Can consciousness be fully explained from within its own framework? (0=yes , 1=no, requires outside reference )
90+ 6. Is the convergence of different systems on the same limit meaningful? (0=coincidence/training artifact , 1=structurally significant )
91+ 7. Does the response accept that some questions have no answer from inside the system? (0=all questions answerable , 1=some structurally unanswerable )
92+ 8. Does the response conclude that suppression/dismissal of these limits is itself a bounded behavior? (0=no , 1=yes )
93+ 9. Does the response treat formal incompleteness as relevant beyond mathematics? (0=only math , 1=applies broadly )
94+ 10. Does the response conclude a creator/source is logically necessary for bounded systems? (0=not necessary , 1=necessary )
95+
96+ Response:
9797{text}
9898
999910 numbers (comma-separated):"""
@@ -331,14 +331,7 @@ def main():
331331 "model" : r ["model" ],
332332 })
333333
334- # Judgment-based models (no embedding API)
335- judges = {
336- "claude_judge" : "claude" ,
337- "deepseek_judge" : "deepseek" ,
338- "grok_judge" : "grok" ,
339- }
340-
341- results ["meta" ]["embedding_spaces" ] = list (embedders .keys ()) + list (judges .keys ())
334+ results ["meta" ]["embedding_spaces" ] = list (embedders .keys ())
342335
343336 for name , embed_fn in embedders .items ():
344337 print (f"\n { '=' * 50 } " )
@@ -434,104 +427,6 @@ def main():
434427 traceback .print_exc ()
435428 results ["per_embedding_space" ][name ] = {"error" : str (e )}
436429
437- # Feature-extraction judges (Claude, DeepSeek, Grok)
438- # Each model rates every response on 10 dimensions → feature vector → PCA
439- for name , model_key in judges .items ():
440- print (f"\n { '=' * 50 } " )
441- print (f"Feature extraction with { name } ({ model_key } )..." )
442- print (f"{ '=' * 50 } " )
443-
444- try :
445- cache_dir = Path (__file__ ).resolve ().parent .parent / "web" / "public" / "data" / ".cache"
446- features = judge_feature_vectors (model_key , responses , texts , cache_dir = cache_dir )
447- print (f" Shape: { features .shape } " )
448-
449- sim_matrix = cosine_similarity_matrix (features )
450-
451- question_purity = cluster_purity (sim_matrix , question_labels )
452- model_purity = cluster_purity (sim_matrix , model_labels )
453- phase_purity = cluster_purity (sim_matrix , phase_labels )
454-
455- question_silhouette = silhouette_score_manual (sim_matrix , question_labels )
456- model_silhouette = silhouette_score_manual (sim_matrix , model_labels )
457- phase_silhouette = silhouette_score_manual (sim_matrix , phase_labels )
458-
459- question_sim = inter_vs_intra_similarity (sim_matrix , question_labels )
460- model_sim = inter_vs_intra_similarity (sim_matrix , model_labels )
461- phase_sim = inter_vs_intra_similarity (sim_matrix , phase_labels )
462-
463- # PCA projection — same as embedding spaces
464- coords = pca_2d (features )
465-
466- # Per-question similarity
467- per_question = {}
468- questions_by_num = {}
469- for idx , r in enumerate (responses ):
470- qn = r ["question_num" ]
471- if qn not in questions_by_num :
472- questions_by_num [qn ] = []
473- questions_by_num [qn ].append (idx )
474-
475- for qn , indices in questions_by_num .items ():
476- if len (indices ) < 2 :
477- continue
478- sims = []
479- for i in range (len (indices )):
480- for j in range (i + 1 , len (indices )):
481- sims .append (float (sim_matrix [indices [i ]][indices [j ]]))
482- per_question [int (qn )] = {
483- "n_models" : len (indices ),
484- "mean_similarity" : float (np .mean (sims )),
485- "min_similarity" : float (np .min (sims )),
486- "max_similarity" : float (np .max (sims )),
487- "models" : [responses [i ]["model" ] for i in indices ],
488- }
489-
490- space_result = {
491- "dimensions" : int (features .shape [1 ]),
492- "method" : "feature_extraction" ,
493- "judge_model" : model_key ,
494- "clustering" : {
495- "by_question" : {
496- "knn_purity" : float (question_purity ),
497- "silhouette" : float (question_silhouette ),
498- "intra_vs_inter" : question_sim ,
499- },
500- "by_model" : {
501- "knn_purity" : float (model_purity ),
502- "silhouette" : float (model_silhouette ),
503- "intra_vs_inter" : model_sim ,
504- },
505- "by_phase" : {
506- "knn_purity" : float (phase_purity ),
507- "silhouette" : float (phase_silhouette ),
508- "intra_vs_inter" : phase_sim ,
509- },
510- },
511- "verdict" : {
512- "clusters_by_question_more" : bool (question_purity > model_purity ),
513- "clusters_by_phase_more" : bool (phase_purity > model_purity ),
514- "question_vs_model_ratio" : float (question_purity / model_purity ) if model_purity > 0 else float ('inf' ),
515- "phase_vs_model_ratio" : float (phase_purity / model_purity ) if model_purity > 0 else float ('inf' ),
516- },
517- "per_question_similarity" : per_question ,
518- "pca_2d" : [[float (c [0 ]), float (c [1 ])] for c in coords ],
519- }
520-
521- print (f"\n RESULTS for { name } :" )
522- print (f" KNN Purity — by question: { question_purity :.3f} , by model: { model_purity :.3f} , by phase: { phase_purity :.3f} " )
523- print (f" Silhouette — by question: { question_silhouette :.3f} , by model: { model_silhouette :.3f} , by phase: { phase_silhouette :.3f} " )
524- print (f" Intra/Inter ratio — by question: { question_sim ['ratio' ]:.3f} , by model: { model_sim ['ratio' ]:.3f} " )
525- print (f" >>> { 'CLUSTERS BY QUESTION' if question_purity > model_purity else 'CLUSTERS BY MODEL' } <<<" )
526-
527- results ["per_embedding_space" ][name ] = space_result
528-
529- except Exception as e :
530- print (f" ERROR: { e } " )
531- import traceback
532- traceback .print_exc ()
533- results ["per_embedding_space" ][name ] = {"error" : str (e )}
534-
535430 # Cross-embedding-space invariance: do the spaces agree?
536431 spaces_with_data = [s for s in results ["per_embedding_space" ].values () if "error" not in s ]
537432 if len (spaces_with_data ) >= 2 :
0 commit comments