-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic_analysis.py
More file actions
31 lines (27 loc) · 1.07 KB
/
Copy pathbasic_analysis.py
File metadata and controls
31 lines (27 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""Basic analysis example — demonstrates headless library usage without Streamlit."""
from vector_observatory import EmbeddingPipeline, HDBSCANClusterer, UMAPReducer
result = (
EmbeddingPipeline()
.from_parquet(
"path/to/your_embeddings.parquet",
id_col="id",
embedding_col="vector",
metadata_cols=["title", "category"],
)
.reduce(UMAPReducer(n_components=2, n_neighbors=15, metric="cosine"))
.cluster(HDBSCANClusterer(min_cluster_size=10))
.compute_metrics()
.store(project="my-project")
.run()
)
ds = result.dataset
print(f"Loaded: {ds.n_samples:,} points, {ds.dim} dims")
print(f"Clusters found: {ds.n_clusters}")
print(f"Noise fraction: {ds.noise_fraction:.1%}")
geo = result.geometry_metrics
print("\nEmbedding health:")
print(f" Anisotropy: {geo.anisotropy:.3f} (>0.5 = collapse risk)")
print(f" Isotropy score: {geo.isotropy_score:.3f}")
print(f" Intrinsic dim: {geo.intrinsic_dim:.1f} (of {ds.dim} total)")
print(f"\nRun ID: {result.run_id}")
print("Saved to project 'my-project'. Open the app to explore.")