Skip to content

Commit 4f1fb07

Browse files
committed
Refactor scripts for vector index and search matrix summarization
- Updated dataset and resource configurations in `run_11_vector_index_build_matrix.sh` and `run_12_vector_search_matrix.sh` to use larger datasets and increased resource limits. - Introduced normalization functions for run labels in multiple scripts to ensure consistent labeling across runs. - Enhanced summary scripts (`summarize_07_tables_oltp_matrix.sh`, `summarize_08_tables_olap_matrix.sh`, `summarize_09_graph_oltp_matrix.sh`, `summarize_10_graph_olap_matrix.sh`, `summarize_11_vector_index_build_matrix.sh`, `summarize_12_vector_search_matrix.sh`) to include additional metrics and improve data handling. - Implemented new functions for handling hash groups and row counts in OLAP summary scripts to facilitate better reporting of cross-DB comparisons. - Improved error handling and directory management in disk usage calculations.
1 parent 9cf3b2f commit 4f1fb07

36 files changed

Lines changed: 2485 additions & 739 deletions

bindings/python/examples/02_social_network_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22
"""
3-
ArcadeDB Python Bindings - Social Network Graph Example
3+
Example 02: Social Network Graph
44
55
This example demonstrates how to use ArcadeDB as a graph database to model
66
a social network with people and friendships. It showcases:
@@ -1166,6 +1166,6 @@ def print_section_header(title, emoji="🔹"):
11661166

11671167

11681168
if __name__ == "__main__":
1169-
print("🌐 ArcadeDB Python - Social Network Graph Example")
1169+
print("🌐 ArcadeDB Python - Example 02: Social Network Graph")
11701170
print("=" * 55)
11711171
main()

bindings/python/examples/03_vector_search.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
import numpy as np
4949

5050
# Parse command line arguments
51-
parser = argparse.ArgumentParser(description="Vector Search Example")
51+
parser = argparse.ArgumentParser(
52+
description="Example 03: Vector Search - Semantic Similarity"
53+
)
5254
args = parser.parse_args()
5355

5456
print("=" * 70)

bindings/python/examples/04_csv_import_documents.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,7 @@ def check_dataset_exists(data_dir):
896896

897897
# Parse command-line arguments
898898
parser = argparse.ArgumentParser(
899-
description="Import MovieLens dataset into ArcadeDB",
899+
description="Example 04: Import MovieLens dataset into ArcadeDB",
900900
formatter_class=argparse.RawDescriptionHelpFormatter,
901901
epilog="""
902902
Examples:

bindings/python/examples/05_csv_import_graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22
"""
3-
Graph Creation Benchmark - Clean Architecture
3+
Example 05: Graph Creation Benchmark - Clean Architecture
44
55
This benchmark compares graph creation strategies with multiple options:
66
- Method: Java API vs SQL
@@ -2152,7 +2152,7 @@ def run_and_validate_queries(db: Any, size: str, check_baseline: bool = True):
21522152

21532153

21542154
def main():
2155-
parser = argparse.ArgumentParser(description="Graph Creation Benchmark")
2155+
parser = argparse.ArgumentParser(description="Example 05: Graph Creation Benchmark")
21562156
parser.add_argument(
21572157
"--dataset",
21582158
choices=["movielens-small", "movielens-large"],
@@ -2231,7 +2231,7 @@ def main():
22312231
db_name = args.db_name
22322232

22332233
print("=" * 70)
2234-
print("🚀 Graph Creation Benchmark")
2234+
print("🚀 Example 05: Graph Creation Benchmark")
22352235
print("=" * 70)
22362236
print(f"Dataset: {args.dataset}")
22372237
print(f"Batch size: {args.batch_size:,}")

bindings/python/examples/06_vector_search_recommendations.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,9 @@ def vector_based_recommendations(
425425

426426

427427
def main():
428-
parser = argparse.ArgumentParser(description="Vector Search Movie Recommendations")
428+
parser = argparse.ArgumentParser(
429+
description="Example 06: Vector Search Movie Recommendations"
430+
)
429431

430432
parser.add_argument(
431433
"--db-path",

bindings/python/examples/07_stackoverflow_tables_oltp.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -577,10 +577,8 @@ def create_schema_duckdb(conn):
577577

578578

579579
def create_duckdb_id_indexes(conn):
580-
for table in TABLE_DEFS:
581-
conn.execute(
582-
f'CREATE INDEX IF NOT EXISTS idx_{table["name"].lower()}_id ON "{table["name"]}"("Id")'
583-
)
580+
print("Skipping manual DuckDB secondary indexes for this benchmark.")
581+
return 0.0
584582

585583

586584
def create_schema_postgresql(conn):
@@ -1433,9 +1431,7 @@ def run_oltp_duckdb(
14331431
f"Ingest end (duckdb, UTC): {ingest_ended_at} "
14341432
f"(elapsed={preload_time:.2f}s)"
14351433
)
1436-
index_start = time.time()
1437-
create_duckdb_id_indexes(conn)
1438-
index_time = time.time() - index_start
1434+
index_time = create_duckdb_id_indexes(conn)
14391435

14401436
load_counts_start = time.time()
14411437
preload_counts = count_table_rows_sql(conn)
@@ -2155,7 +2151,9 @@ def run_in_docker(args):
21552151

21562152

21572153
def main():
2158-
parser = argparse.ArgumentParser(description="Stack Overflow Tables (OLTP)")
2154+
parser = argparse.ArgumentParser(
2155+
description="Example 07: Stack Overflow Tables (OLTP)"
2156+
)
21592157
parser.add_argument(
21602158
"--dataset",
21612159
choices=sorted(EXPECTED_DATASETS),

bindings/python/examples/08_stackoverflow_tables_olap.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -998,17 +998,8 @@ def create_indexes_sqlite(conn: sqlite3.Connection) -> float:
998998

999999

10001000
def create_indexes_duckdb(conn) -> float:
1001-
start = time.time()
1002-
for table, props, unique in INDEX_DEFS:
1003-
suffix = "_".join(props)
1004-
index_name = f"idx_{table}_{suffix}"
1005-
unique_sql = "UNIQUE " if unique else ""
1006-
ddl = (
1007-
f"CREATE {unique_sql}INDEX IF NOT EXISTS {index_name} "
1008-
f"ON {table} ({', '.join(props)})"
1009-
)
1010-
conn.execute(ddl)
1011-
return time.time() - start
1001+
print("Skipping manual DuckDB secondary indexes for this benchmark.", flush=True)
1002+
return 0.0
10121003

10131004

10141005
def create_indexes_postgresql(conn) -> float:
@@ -1561,10 +1552,7 @@ def run_olap_arcadedb(
15611552
},
15621553
"index": {
15631554
"total_s": index_elapsed,
1564-
"indexes": [
1565-
{"table": t, "properties": props, "unique": unique}
1566-
for t, props, unique in INDEX_DEFS
1567-
],
1555+
"indexes": [],
15681556
},
15691557
"queries": {
15701558
"total_s": query_elapsed,
@@ -2002,7 +1990,7 @@ def query_runner(sql: str) -> List[Dict[str, Any]]:
20021990

20031991
def main():
20041992
parser = argparse.ArgumentParser(
2005-
description="Stack Overflow Tables (OLAP)",
1993+
description="Example 08: Stack Overflow Tables (OLAP)",
20061994
)
20071995
parser.add_argument(
20081996
"--dataset",

0 commit comments

Comments
 (0)