Skip to content

Commit ac6a84b

Browse files
committed
Fix Python examples CI: Update workflow triggers, use native SQL IMPORT DATABASE in Example 06, remove redundant JsonlImporter
1 parent b3f7fe3 commit ac6a84b

3 files changed

Lines changed: 26 additions & 114 deletions

File tree

.github/workflows/test-python-examples.yml

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
11
name: Test Python Examples
22

33
on:
4-
# Run on pull requests to main
4+
# Run on push to bindings/python/ directory
5+
push:
6+
paths:
7+
- 'bindings/python/**'
8+
- '.github/workflows/test-python-examples.yml'
9+
10+
# Run on pull request affecting bindings/python/
511
pull_request:
6-
branches: [main]
12+
paths:
13+
- 'bindings/python/**'
14+
- '.github/workflows/test-python-examples.yml'
15+
16+
# Run after release workflow completes
17+
workflow_run:
18+
workflows: ["Release"]
19+
types: [completed]
720

821
# Allow being called by other workflows (e.g., release workflow)
922
workflow_call:
1023

1124
# Allow manual trigger
1225
workflow_dispatch:
1326

27+
permissions:
28+
contents: read
29+
1430
jobs:
1531
# First job: Download ArcadeDB JARs (platform-agnostic)
1632
download-jars:
@@ -169,15 +185,11 @@ jobs:
169185
run: |
170186
# Install dependencies needed by examples
171187
# Note: macOS Intel needs NumPy 1.x for PyTorch compatibility
172-
# Note: Windows ARM64 doesn't have PyTorch wheels, skip sentence-transformers
173-
if [[ "${{ matrix.platform }}" == "windows/arm64" ]]; then
174-
echo "⚠️ Skipping sentence-transformers on Windows ARM64 (PyTorch not available)"
175-
pip install numpy requests
176-
elif [[ "$OSTYPE" == "darwin"* ]]; then
188+
if [[ "$OSTYPE" == "darwin"* ]]; then
177189
# macOS: Default wheels are CPU/MPS (no CUDA)
178190
pip install "numpy<2.0" requests sentence-transformers
179191
else
180-
# Linux & Windows x64: Install CPU-only PyTorch to save space (avoid CUDA)
192+
# Linux & Windows: Install CPU-only PyTorch to save space (avoid CUDA)
181193
pip install torch --index-url https://download.pytorch.org/whl/cpu
182194
pip install numpy requests sentence-transformers
183195
fi
@@ -247,17 +259,6 @@ jobs:
247259
for example in $examples; do
248260
total=$((total + 1))
249261
250-
# Skip example 06 on Windows ARM64 (PyTorch/sentence-transformers not available)
251-
if [[ "$example" == "06_vector_search_recommendations.py" ]] && [[ "${{ matrix.platform }}" == "windows/arm64" ]]; then
252-
echo "----------------------------------------"
253-
echo "📝 Skipping: $example (PyTorch not available on Windows ARM64)"
254-
echo "----------------------------------------"
255-
echo "⏭️ SKIPPED: $example (PyTorch not available on Windows ARM64)" | tee -a $results_file
256-
skipped=$((skipped + 1))
257-
echo ""
258-
continue
259-
fi
260-
261262
# Set example-specific parameters and timeout
262263
case "$example" in
263264
"04_csv_import_documents.py")
@@ -393,6 +394,8 @@ jobs:
393394
echo " - Tested with \`--dataset movielens-small --parallel 1 --no-async --export --import-jsonl\`" >> $GITHUB_STEP_SUMMARY
394395
echo "- **06_vector_search_recommendations.py** - JVector vector indexing for movie recommendations" >> $GITHUB_STEP_SUMMARY
395396
echo " - Tested with \`--import-jsonl exports/movielens_graph_small_db.jsonl.tgz\`" >> $GITHUB_STEP_SUMMARY
397+
echo "- **07_stackoverflow_multimodel.py** - Multi-model graph/document/vector example with StackOverflow data" >> $GITHUB_STEP_SUMMARY
398+
echo " - Tested with \`--dataset stackoverflow-small\`" >> $GITHUB_STEP_SUMMARY
396399
echo "" >> $GITHUB_STEP_SUMMARY
397400
398401
- name: Upload example logs

bindings/python/examples/06_vector_search_recommendations.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,10 @@ def import_from_jsonl(jsonl_path, db_path):
109109

110110
# Create new database
111111
with arcadedb.create_database(str(db_path)) as db:
112-
# Import using the importer tool
113-
# Note: We use the importer module directly
114-
from arcadedb_embedded.importer import JsonlImporter
115-
116-
importer = JsonlImporter(db)
117-
importer.import_file(str(jsonl_path))
112+
# Import using SQL IMPORT DATABASE command
113+
abs_path = Path(jsonl_path).resolve()
114+
print(f"Importing from {abs_path}...")
115+
db.command("sql", f"IMPORT DATABASE file://{abs_path}")
118116

119117
return time.time() - start_time
120118

bindings/python/src/arcadedb_embedded/importer.py

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -803,95 +803,6 @@ def import_csv(database, file_path: str, type_name: str, **options) -> Dict[str,
803803
)
804804

805805

806-
class JsonlImporter:
807-
"""
808-
Importer for JSONL (JSON Lines) files.
809-
"""
810-
811-
def __init__(self, database):
812-
self.database = database
813-
814-
def import_file(
815-
self, file_path: str, batch_size: int = 5000, **options
816-
) -> Dict[str, Any]:
817-
"""
818-
Import data from a JSONL file.
819-
820-
Args:
821-
file_path: Path to the JSONL file
822-
batch_size: Number of records to commit in a batch
823-
**options: Additional options
824-
825-
Returns:
826-
Dict with import statistics
827-
"""
828-
import json
829-
import time
830-
831-
if not os.path.exists(file_path):
832-
raise ArcadeDBError(f"File not found: {file_path}")
833-
834-
start_time = time.time()
835-
stats = {"documents": 0, "vertices": 0, "edges": 0, "errors": 0}
836-
837-
self.database.begin()
838-
try:
839-
with open(file_path, "r", encoding="utf-8") as f:
840-
count = 0
841-
for line in f:
842-
line = line.strip()
843-
if not line:
844-
continue
845-
846-
try:
847-
doc_data = json.loads(line)
848-
849-
# Determine record type
850-
cat = doc_data.get("@cat")
851-
type_name = doc_data.get("@type")
852-
853-
record = None
854-
if cat == "v":
855-
if type_name:
856-
record = self.database.new_vertex(type_name)
857-
stats["vertices"] += 1
858-
elif cat == "d":
859-
if type_name:
860-
record = self.database.new_document(type_name)
861-
stats["documents"] += 1
862-
elif type_name:
863-
# Default to document if category not specified
864-
record = self.database.new_document(type_name)
865-
stats["documents"] += 1
866-
867-
if record:
868-
# Set properties
869-
for k, v in doc_data.items():
870-
if k.startswith("@"):
871-
continue
872-
record.set(k, v)
873-
874-
record.save()
875-
count += 1
876-
877-
if count % batch_size == 0:
878-
self.database.commit()
879-
self.database.begin()
880-
881-
except json.JSONDecodeError:
882-
stats["errors"] += 1
883-
except Exception:
884-
stats["errors"] += 1
885-
886-
self.database.commit()
887-
except Exception:
888-
self.database.rollback()
889-
raise
890-
891-
stats["duration_ms"] = (time.time() - start_time) * 1000
892-
return stats
893-
894-
895806
def import_neo4j(database, file_path: str, **options) -> Dict[str, Any]:
896807
"""
897808
Import Neo4j JSONL export into database using Java Neo4jImporter.

0 commit comments

Comments
 (0)