|
| 1 | +from pydantic import BaseModel, SecretStr |
| 2 | + |
| 3 | +from ..api import DBCaseConfig, DBConfig, IndexType, MetricType |
| 4 | + |
| 5 | + |
| 6 | +class LanceDBConfig(DBConfig): |
| 7 | + """LanceDB connection configuration.""" |
| 8 | + |
| 9 | + db_label: str |
| 10 | + uri: str |
| 11 | + token: SecretStr | None = None |
| 12 | + |
| 13 | + def to_dict(self) -> dict: |
| 14 | + return { |
| 15 | + "uri": self.uri, |
| 16 | + "token": self.token.get_secret_value() if self.token else None, |
| 17 | + } |
| 18 | + |
| 19 | + |
| 20 | +class LanceDBIndexConfig(BaseModel, DBCaseConfig): |
| 21 | + index: IndexType = IndexType.IVFPQ |
| 22 | + metric_type: MetricType = MetricType.L2 |
| 23 | + num_partitions: int = 0 |
| 24 | + num_sub_vectors: int = 0 |
| 25 | + nbits: int = 8 # Must be 4 or 8 |
| 26 | + sample_rate: int = 256 |
| 27 | + max_iterations: int = 50 |
| 28 | + |
| 29 | + def index_param(self) -> dict: |
| 30 | + if self.index not in [ |
| 31 | + IndexType.IVFPQ, |
| 32 | + IndexType.HNSW, |
| 33 | + IndexType.AUTOINDEX, |
| 34 | + IndexType.NONE, |
| 35 | + ]: |
| 36 | + msg = f"Index type {self.index} is not supported for LanceDB!" |
| 37 | + raise ValueError(msg) |
| 38 | + |
| 39 | + # See https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.create_index |
| 40 | + params = { |
| 41 | + "metric": self.parse_metric(), |
| 42 | + "num_bits": self.nbits, |
| 43 | + "sample_rate": self.sample_rate, |
| 44 | + "max_iterations": self.max_iterations, |
| 45 | + } |
| 46 | + |
| 47 | + if self.num_partitions > 0: |
| 48 | + params["num_partitions"] = self.num_partitions |
| 49 | + if self.num_sub_vectors > 0: |
| 50 | + params["num_sub_vectors"] = self.num_sub_vectors |
| 51 | + |
| 52 | + return params |
| 53 | + |
| 54 | + def search_param(self) -> dict: |
| 55 | + pass |
| 56 | + |
| 57 | + def parse_metric(self) -> str: |
| 58 | + if self.metric_type in [MetricType.L2, MetricType.COSINE]: |
| 59 | + return self.metric_type.value.lower() |
| 60 | + if self.metric_type in [MetricType.IP, MetricType.DP]: |
| 61 | + return "dot" |
| 62 | + msg = f"Metric type {self.metric_type} is not supported for LanceDB!" |
| 63 | + raise ValueError(msg) |
| 64 | + |
| 65 | + |
| 66 | +class LanceDBNoIndexConfig(LanceDBIndexConfig): |
| 67 | + index: IndexType = IndexType.NONE |
| 68 | + |
| 69 | + def index_param(self) -> dict: |
| 70 | + return {} |
| 71 | + |
| 72 | + |
| 73 | +class LanceDBAutoIndexConfig(LanceDBIndexConfig): |
| 74 | + index: IndexType = IndexType.AUTOINDEX |
| 75 | + |
| 76 | + def index_param(self) -> dict: |
| 77 | + return {} |
| 78 | + |
| 79 | + |
| 80 | +class LanceDBHNSWIndexConfig(LanceDBIndexConfig): |
| 81 | + index: IndexType = IndexType.HNSW |
| 82 | + m: int = 0 |
| 83 | + ef_construction: int = 0 |
| 84 | + |
| 85 | + def index_param(self) -> dict: |
| 86 | + params = LanceDBIndexConfig.index_param(self) |
| 87 | + |
| 88 | + # See https://lancedb.github.io/lancedb/python/python/#lancedb.index.HnswSq |
| 89 | + params["index_type"] = "IVF_HNSW_SQ" |
| 90 | + if self.m > 0: |
| 91 | + params["m"] = self.m |
| 92 | + if self.ef_construction > 0: |
| 93 | + params["ef_construction"] = self.ef_construction |
| 94 | + |
| 95 | + return params |
| 96 | + |
| 97 | + |
| 98 | +_lancedb_case_config = { |
| 99 | + IndexType.IVFPQ: LanceDBIndexConfig, |
| 100 | + IndexType.AUTOINDEX: LanceDBAutoIndexConfig, |
| 101 | + IndexType.HNSW: LanceDBHNSWIndexConfig, |
| 102 | + IndexType.NONE: LanceDBNoIndexConfig, |
| 103 | +} |
0 commit comments