Release v0.1.0

doubleinfinity · web-flow · commit 4c656aaa0974 · 2025-07-13T21:28:17.000+10:00
* Bump version to 0.1.0 for development

* feat(api): introduce unified `create()` method for extensible vector index creation

* Refactor internal tests to use new generic `.create()` API

* Refactor internal tests to use new generic `.create()` API

* test: Update test suite for new .create() API

* 📄 docs(readme): updated the content information

* Add: latest uv.lock file for reproducible Python dependency management

* 📄 docs(readme): updated the content information

* 📄 docs(readme): updated the content information

* 📄 docs(changelog): update for v0.1.0 release
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ---
 
+## [0.1.0] - 2025-07-13
+
+### Added
+- **Generic `create()` method** for extensible vector index creation
+  - Registry-based architecture supporting multiple index types
+  - Case-insensitive index type matching: `create("HNSW")` or `create("hnsw")`
+  - Comprehensive parameter defaults with Rust backend validation
+  - Self-updating error messages showing all available index types
+  - Supports case-insensitive index types (e.g. "HNSW" and "hnsw")
+- **`available_index_types()`** class method for programmatic type discovery
+- Future-ready architecture for IVF, LSH, Annoy, and Flat index types
+
+### Changed
+- ⚠️ **Breaking Change**: Replaced index-specific factory methods with generic `create()`
+  - Migration: `VectorDatabase().create_index_hnsw(dim=768)` → `VectorDatabase().create("hnsw", dim=768)`
+  - All HNSW parameters now default to best-practice values; dim is the only commonly customized field. Most of the settings like `m`, `ef_construction`, `expected_size`, and `space` already have good defaults, so users typically don't change them. The only one they usually set themselves is `dim`, since it must match the shape of their data.
+  - Improved error messages with dynamic type listing
+
+### Fixed
+- Updated all internal testing files to use the new .create()` API
+
+### Removed
+- Index-specific factory methods (replaced by unified `create()` interface)
+
+---
+
 ## [0.0.9] - 2025-07-10
 
 ### Added
diff --git a/README.md b/README.md
@@ -105,7 +105,7 @@ from zeusdb_vector_database import VectorDatabase
 vdb = VectorDatabase()
 
 # Initialize and set up the database resources
-index = vdb.create_index_hnsw(dim = 8)
+index = vdb.create(index_type="hnsw", dim=8)
 
 # Vector embeddings with accompanying ID's and Metadata
 records = [
@@ -156,11 +156,11 @@ ZeusDB Vector Database makes it easy to work with high-dimensional vector data u
 
 **Three simple steps**
 
-1. **Create an index**  
-2. **Add data to the index**  
-3. **Conduct a similarity search**
+1. **Create an index** using `.create()`
+2. **Add data** using `.add(...)`
+3. **Conduct a similarity search** using `.search(...)`
 
-Each step is covered below.
+Each step is covered below. 
 
 <br/>
 
@@ -176,22 +176,24 @@ from zeusdb_vector_database import VectorDatabase
 vdb = VectorDatabase()
 
 # Initialize and set up the database resources
-index = vdb.create_index_hnsw(
+index = vdb.create(
+  index_type = "hnsw",
   dim = 8, 
   space = "cosine", 
-  M = 16, 
+  m = 16, 
   ef_construction = 200, 
-  expected_size=5
+  expected_size = 5
   )
 ```
 
-#### 📘 `create_index_hnsw()` Parameters
+#### 📘 `create()` Parameters
 
 | Parameter        | Type   | Default   | Description                                                                 |
 |------------------|--------|-----------|-----------------------------------------------------------------------------|
+| `index_type`     | `str`  | `"hnsw"`  | The type of vector index to create. Currently supports `"hnsw"`. Future options include `"ivf"`, `"flat"`, etc. Case-insensitive. |
 | `dim`            | `int`  | `1536`    | Dimensionality of the vectors to be indexed. Each vector must have this length. The default dim=1536 is chosen to match the output dimensionality of OpenAI’s text-embedding-ada-002 model. |
 | `space`          | `str`  | `"cosine"`| Distance metric used for similarity search. Options include `"cosine"`. Additional metrics such as `"l2"`, and `"dot"` will be added in future versions. |
-| `M`              | `int`  | `16`      | Number of bi-directional connections created for each new node. Higher `M` improves recall but increases index size and build time. |
+| `m`              | `int`  | `16`      | Number of bi-directional connections created for each new node. Higher `m` improves recall but increases index size and build time. |
 | `ef_construction`| `int`  | `200`     | Size of the dynamic list used during index construction. Larger values increase indexing time and memory, but improve quality. |
 | `expected_size`  | `int`  | `10000`   | Estimated number of elements to be inserted. Used for preallocating internal data structures. Not a hard limit. |
 
@@ -374,7 +376,7 @@ print(index.info())
 ```
 *Output*
 ```
-HNSWIndex(dim=8, space=cosine, M=16, ef_construction=200, expected_size=5, vectors=5)
+HNSWIndex(dim=8, space=cosine, m=16, ef_construction=200, expected_size=5, vectors=5)
 ```
 
 <br/>
diff --git a/benchmarks/1.create_index_using_hnsw.py b/benchmarks/1.create_index_using_hnsw.py
@@ -1,11 +1,33 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
-# Initialize and set up the database resources
-index = vdb.create_index_hnsw(dim = 1536, space = "cosine", M = 16, ef_construction = 200, expected_size=5)
+# Absolute minimal - uses all defaults
+index1 = vdb.create()  # index_type="hnsw", dim=1536, space="cosine", m=16, etc.
+print("\n✅ Absolute minimal index creation (all defaults):")
+print(index1.info())
 
-# Outputs the details of the HNSW index
-print(index.info()) 
- 
+# Specify just the index type
+# This will use default values for all other parameters
+index2 = vdb.create("hnsw")  # dim=1536, space="cosine", m=16, etc.
+print("\n✅ Absolute minimal index creation (all defaults):")
+print(index1.info())
+
+# Specify just the arguments you need
+index3 = vdb.create("hnsw", dim=768, m=32)
+print("\n✅ Specify 'dim' and 'm' in index creation:")
+print(index2.info())
+
+# Specify all arguments explicitly
+index4 = vdb.create(
+    "hnsw",
+    dim=128,
+    space="cosine",
+    m=32,
+    ef_construction=100,
+    expected_size=5
+)
+print("\n✅ Specify all arguments explicitly:")
+print(index4.info())
diff --git a/benchmarks/10.new_query_data_with_metadata.py b/benchmarks/10.new_query_data_with_metadata.py
@@ -1,11 +1,13 @@
 import numpy as np
+
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index
-index = vdb.create_index_hnsw(dim=384, expected_size=10000)
+index = vdb.create(index_type="hnsw", dim=384, expected_size=10000)
 
 # Outputs the details of the HNSW index
 print("\n--- Shows Initial Index Information ---")
diff --git a/benchmarks/11.new_query_data_return_vector.py b/benchmarks/11.new_query_data_return_vector.py
@@ -1,11 +1,13 @@
 import numpy as np
+
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index with dim=4
-index = vdb.create_index_hnsw(dim=4, expected_size=100)
+index = vdb.create(index_type="hnsw", dim=4, expected_size=100)
 
 # Show index info
 print("\n--- Shows Initial Index Information ---")
diff --git a/benchmarks/12.readme_code.py b/benchmarks/12.readme_code.py
@@ -6,7 +6,7 @@
 
 # Initialize and set up the database resources
 #index = vdb.create_index_hnsw(dim = 8, space = "cosine", M = 16, ef_construction = 200, expected_size=5)
-index = vdb.create_index_hnsw(dim = 8)
+index = vdb.create(index_type="hnsw", dim=8)
 
 # Upload vector records using the unified `add()` method
 records = [
diff --git a/benchmarks/13.new_add_data_with_errors.py b/benchmarks/13.new_add_data_with_errors.py
@@ -5,7 +5,7 @@
 vdb = VectorDatabase()
 
 # Step 1: Set up index with dim=8
-index = vdb.create_index_hnsw(dim=8, space="cosine", M=16, ef_construction=200, expected_size=5)
+index = vdb.create(index_type="hnsw", dim=8, space="cosine", m=16, ef_construction=200, expected_size=5)
 
 # Step 2: Add initial valid records
 records = [
diff --git a/benchmarks/14.new_benchmark_overwrite.py b/benchmarks/14.new_benchmark_overwrite.py
@@ -9,7 +9,7 @@
 vdb = VectorDatabase()
 
 # Create index with large capacity
-index = vdb.create_index_hnsw(dim=8, expected_size=100000)
+index = vdb.create(index_type="hnsw", dim=8, expected_size=100000)
 
 # Insert 10,000 random records
 data = [{"id": f"doc_{i}", "values": [random.random() for _ in range(8)]} for i in range(10000)]
diff --git a/benchmarks/15.get_records.py b/benchmarks/15.get_records.py
@@ -5,7 +5,7 @@
 vdb = VectorDatabase()
 
 # Initialize and set up the database resources
-index = vdb.create_index_hnsw(dim = 2, space = "cosine", M = 16, ef_construction = 200, expected_size=5)
+index = vdb.create(index_type="hnsw", dim=2, space="cosine", m=16, ef_construction=200, expected_size=5)
 
 # Upload vector records using the unified `add()` method
 index.add([
diff --git a/benchmarks/16.new_test_cosine_L1_L2.py b/benchmarks/16.new_test_cosine_L1_L2.py
@@ -14,7 +14,7 @@
 print("\n==================== Testing space = 'cosine' ====================\n")
 
 vdb_cos = VectorDatabase()
-index_cos = vdb_cos.create_index_hnsw(dim=8, space="cosine", M=16, ef_construction=200, expected_size=5)
+index_cos = vdb_cos.create(index_type="hnsw", dim=8, space="cosine", m=16, ef_construction=200, expected_size=5)
 
 add_result_cos = index_cos.add(records)
 print("--- Add Results Summary ---")
@@ -34,7 +34,8 @@
 print("\n==================== Testing space = 'L2' ====================\n")
 
 vdb_l2 = VectorDatabase()
-index_l2 = vdb_l2.create_index_hnsw(dim=8, space="L2", M=16, ef_construction=200, expected_size=5)
+index_l2 = vdb_l2.create(index_type="hnsw", dim=8, space="L2", m=16, ef_construction=200, expected_size=5)
+
 
 add_result_l2 = index_l2.add(records)
 print("--- Add Results Summary ---")
@@ -54,7 +55,7 @@
 print("\n==================== Testing space = 'L1' ====================\n")
 
 vdb_l1 = VectorDatabase()
-index_l1 = vdb_l1.create_index_hnsw(dim=8, space="L1", M=16, ef_construction=200, expected_size=5)
+index_l1 = vdb_l1.create(index_type="hnsw", dim=8, space="L1", m=16, ef_construction=200, expected_size=5)
 
 add_result_l1 = index_l1.add(records)
 print("--- Add Results Summary ---")
diff --git a/benchmarks/17.new_case_insensitive_test.py b/benchmarks/17.new_case_insensitive_test.py
@@ -8,7 +8,9 @@
     print(f"\n--- Creating index with space = '{metric}' ---")
     try:
         vdb = VectorDatabase()
-        index = vdb.create_index_hnsw(dim=4, space=metric, M=8, ef_construction=100, expected_size=10)
+        index = vdb.create(index_type="hnsw", dim=4, space=metric, m=8, ef_construction=100, expected_size=10)
         print(f"✔️  Successfully created index with space = '{metric}'")
     except Exception as e:
         print(f"❌ Failed to create index with space = '{metric}': {e}")
+
+
diff --git a/benchmarks/18.new_test_metadata_filters_basic.py b/benchmarks/18.new_test_metadata_filters_basic.py
@@ -4,7 +4,7 @@
 
 # Create the index
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=4, space="cosine", expected_size=10)
+index = vdb.create(index_type="hnsw", dim=4, space="cosine", expected_size=10)
 
 # Add records with metadata of varying structure
 records = [
diff --git a/benchmarks/19.new_test_metadata_filters_advanced.py b/benchmarks/19.new_test_metadata_filters_advanced.py
@@ -7,7 +7,7 @@
 vdb = VectorDatabase()
 
 # Initialize and set up the database resources
-index = vdb.create_index_hnsw(dim=8)
+index = vdb.create(index_type="hnsw", dim=8)
 
 print("=" * 60)
 print("ZEUSDB RICH METADATA FILTER TESTING")
diff --git a/benchmarks/2.add_index_metadata.py b/benchmarks/2.add_index_metadata.py
@@ -1,7 +1,11 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
+# Instantiate the VectorDatabase class
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=128, space="cosine", M=32, ef_construction=100)
+
+# Create an HNSW index with specific parameters
+index = vdb.create(index_type="hnsw", dim=128, space="cosine", m=32, ef_construction=100)
 
 # Outputs the details of the HNSW index
 print(index.info())  
diff --git a/benchmarks/3.list_records_in_index.py b/benchmarks/3.list_records_in_index.py
@@ -1,8 +1,11 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
-# Step 1: Create the index
+# Instantiate the VectorDatabase class
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=8, space="cosine", M=16, ef_construction=200, expected_size=5)
+
+# Step 1: Create the index
+index = vdb.create(index_type="hnsw", dim=8, space="cosine", m=16, ef_construction=200, expected_size=5)
 
 # Step 2: Add data points
 records = [
diff --git a/benchmarks/4.check_point_exists.py b/benchmarks/4.check_point_exists.py
@@ -1,10 +1,11 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create an HNSW index with specified dimension and expected size
-index = vdb.create_index_hnsw(dim=2, expected_size=5)
+index = vdb.create(index_type="hnsw", dim=2, expected_size=5)
 
 # Add a point to the index - Blank metadata
 result = index.add({
diff --git a/benchmarks/5.remove_points.py b/benchmarks/5.remove_points.py
@@ -1,10 +1,11 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create an HNSW index with specified dimension and expected size
-index = vdb.create_index_hnsw(dim=2, expected_size=5)
+index = vdb.create(index_type="HNSW", dim=2, expected_size=5)
 
 # Add a point to the index - Blank metadata
 result = index.add({
diff --git a/benchmarks/6.new_add_single_data.py b/benchmarks/6.new_add_single_data.py
@@ -1,12 +1,12 @@
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=384, expected_size=10000)
-#index = vdb.create_index_hnsw(dim = 1536, space = "cosine", M = 16, ef_construction = 200, expected_size=5)
+index = vdb.create(index_type="hnsw", dim=384, expected_size=10000)
 
 # Outputs the details of the HNSW index
 print("\n--- Shows Initial Index Information ---")
diff --git a/benchmarks/7.new_add_list_of_objects.py b/benchmarks/7.new_add_list_of_objects.py
@@ -1,12 +1,14 @@
 import numpy as np
+
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=384, expected_size=10000)
+index = vdb.create(index_type="hnsw", dim=384, expected_size=10000)
 
 # Outputs the details of the HNSW index
 print("\n--- Shows Initial Index Information ---")
diff --git a/benchmarks/8.new_add_seperate_arrays.py b/benchmarks/8.new_add_seperate_arrays.py
@@ -1,12 +1,14 @@
 import numpy as np
+
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=384, expected_size=10000)
+index = vdb.create(index_type="hnsw", dim=384, expected_size=10000)
 
 # Outputs the details of the HNSW index
 print("\n--- Shows Initial Index Information ---")
diff --git a/benchmarks/9.new_query_data.py b/benchmarks/9.new_query_data.py
@@ -1,12 +1,14 @@
 import numpy as np
+
+# Import the vector database module
 from zeusdb_vector_database import VectorDatabase
 
 # Instantiate the VectorDatabase class
 vdb = VectorDatabase()
 
 # Create index
 vdb = VectorDatabase()
-index = vdb.create_index_hnsw(dim=384, expected_size=10000)
+index = vdb.create(index_type="hnsw", dim=384, expected_size=10000)
 
 # Outputs the details of the HNSW index
 print("\n--- Shows Initial Index Information ---")
@@ -26,7 +28,7 @@
 
 print("\n--- Shows Data Insertion Results ---")
 print(f"Batch result: {result}")
-# Output: BatchResult(inserted=1000, errors=0, shape=(1000, 384))
+# Output: AddResult(inserted=1000, errors=0, shape=(1000, 384))
 
 
 # Outputs the details of the HNSW index
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "zeusdb-vector-database"
-version = "0.0.9"
+version = "0.1.0"
 description = "Blazing-fast vector DB with real-time similarity search and metadata filtering."
 readme = "README.md"
 authors = [
diff --git a/src/zeusdb_vector_database/__init__.py b/src/zeusdb_vector_database/__init__.py
diff --git a/src/zeusdb_vector_database/vector_database.py b/src/zeusdb_vector_database/vector_database.py
diff --git a/tests/test_hnsw.py b/tests/test_hnsw.py
diff --git a/uv.lock b/uv.lock
diff --git a/vdb-core/Cargo.toml b/vdb-core/Cargo.toml
diff --git a/vdb-core/src/hnsw_index.rs b/vdb-core/src/hnsw_index.rs