Fix Python examples CI: Update workflow triggers, use native SQL IMPORT DATABASE in Example 06, remove redundant JsonlImporter

tae898 · tae898 · commit ac6a84bd6d20 · 2025-12-28T18:04:40.000+01:00
diff --git a/.github/workflows/test-python-examples.yml b/.github/workflows/test-python-examples.yml
@@ -1,16 +1,32 @@
 name: Test Python Examples
 
 on:
-  # Run on pull requests to main
+  # Run on push to bindings/python/ directory
+  push:
+    paths:
+      - 'bindings/python/**'
+      - '.github/workflows/test-python-examples.yml'
+
+  # Run on pull request affecting bindings/python/
   pull_request:
-    branches: [main]
+    paths:
+      - 'bindings/python/**'
+      - '.github/workflows/test-python-examples.yml'
+
+  # Run after release workflow completes
+  workflow_run:
+    workflows: ["Release"]
+    types: [completed]
 
   # Allow being called by other workflows (e.g., release workflow)
   workflow_call:
 
   # Allow manual trigger
   workflow_dispatch:
 
+permissions:
+  contents: read
+
 jobs:
   # First job: Download ArcadeDB JARs (platform-agnostic)
   download-jars:
@@ -169,15 +185,11 @@ jobs:
         run: |
           # Install dependencies needed by examples
           # Note: macOS Intel needs NumPy 1.x for PyTorch compatibility
-          # Note: Windows ARM64 doesn't have PyTorch wheels, skip sentence-transformers
-          if [[ "${{ matrix.platform }}" == "windows/arm64" ]]; then
-            echo "⚠️  Skipping sentence-transformers on Windows ARM64 (PyTorch not available)"
-            pip install numpy requests
-          elif [[ "$OSTYPE" == "darwin"* ]]; then
+          if [[ "$OSTYPE" == "darwin"* ]]; then
             # macOS: Default wheels are CPU/MPS (no CUDA)
             pip install "numpy<2.0" requests sentence-transformers
           else
-            # Linux & Windows x64: Install CPU-only PyTorch to save space (avoid CUDA)
+            # Linux & Windows: Install CPU-only PyTorch to save space (avoid CUDA)
             pip install torch --index-url https://download.pytorch.org/whl/cpu
             pip install numpy requests sentence-transformers
           fi
@@ -247,17 +259,6 @@ jobs:
           for example in $examples; do
             total=$((total + 1))
 
-            # Skip example 06 on Windows ARM64 (PyTorch/sentence-transformers not available)
-            if [[ "$example" == "06_vector_search_recommendations.py" ]] && [[ "${{ matrix.platform }}" == "windows/arm64" ]]; then
-              echo "----------------------------------------"
-              echo "📝 Skipping: $example (PyTorch not available on Windows ARM64)"
-              echo "----------------------------------------"
-              echo "⏭️  SKIPPED: $example (PyTorch not available on Windows ARM64)" | tee -a $results_file
-              skipped=$((skipped + 1))
-              echo ""
-              continue
-            fi
-
             # Set example-specific parameters and timeout
             case "$example" in
               "04_csv_import_documents.py")
@@ -393,6 +394,8 @@ jobs:
           echo "  - Tested with \`--dataset movielens-small --parallel 1 --no-async --export --import-jsonl\`" >> $GITHUB_STEP_SUMMARY
           echo "- **06_vector_search_recommendations.py** - JVector vector indexing for movie recommendations" >> $GITHUB_STEP_SUMMARY
           echo "  - Tested with \`--import-jsonl exports/movielens_graph_small_db.jsonl.tgz\`" >> $GITHUB_STEP_SUMMARY
+          echo "- **07_stackoverflow_multimodel.py** - Multi-model graph/document/vector example with StackOverflow data" >> $GITHUB_STEP_SUMMARY
+          echo "  - Tested with \`--dataset stackoverflow-small\`" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
 
       - name: Upload example logs
diff --git a/bindings/python/examples/06_vector_search_recommendations.py b/bindings/python/examples/06_vector_search_recommendations.py
@@ -109,12 +109,10 @@ def import_from_jsonl(jsonl_path, db_path):
 
     # Create new database
     with arcadedb.create_database(str(db_path)) as db:
-        # Import using the importer tool
-        # Note: We use the importer module directly
-        from arcadedb_embedded.importer import JsonlImporter
-
-        importer = JsonlImporter(db)
-        importer.import_file(str(jsonl_path))
+        # Import using SQL IMPORT DATABASE command
+        abs_path = Path(jsonl_path).resolve()
+        print(f"Importing from {abs_path}...")
+        db.command("sql", f"IMPORT DATABASE file://{abs_path}")
 
     return time.time() - start_time
 
diff --git a/bindings/python/src/arcadedb_embedded/importer.py b/bindings/python/src/arcadedb_embedded/importer.py
@@ -803,95 +803,6 @@ def import_csv(database, file_path: str, type_name: str, **options) -> Dict[str,
     )
 
 
-class JsonlImporter:
-    """
-    Importer for JSONL (JSON Lines) files.
-    """
-
-    def __init__(self, database):
-        self.database = database
-
-    def import_file(
-        self, file_path: str, batch_size: int = 5000, **options
-    ) -> Dict[str, Any]:
-        """
-        Import data from a JSONL file.
-
-        Args:
-            file_path: Path to the JSONL file
-            batch_size: Number of records to commit in a batch
-            **options: Additional options
-
-        Returns:
-            Dict with import statistics
-        """
-        import json
-        import time
-
-        if not os.path.exists(file_path):
-            raise ArcadeDBError(f"File not found: {file_path}")
-
-        start_time = time.time()
-        stats = {"documents": 0, "vertices": 0, "edges": 0, "errors": 0}
-
-        self.database.begin()
-        try:
-            with open(file_path, "r", encoding="utf-8") as f:
-                count = 0
-                for line in f:
-                    line = line.strip()
-                    if not line:
-                        continue
-
-                    try:
-                        doc_data = json.loads(line)
-
-                        # Determine record type
-                        cat = doc_data.get("@cat")
-                        type_name = doc_data.get("@type")
-
-                        record = None
-                        if cat == "v":
-                            if type_name:
-                                record = self.database.new_vertex(type_name)
-                                stats["vertices"] += 1
-                        elif cat == "d":
-                            if type_name:
-                                record = self.database.new_document(type_name)
-                                stats["documents"] += 1
-                        elif type_name:
-                            # Default to document if category not specified
-                            record = self.database.new_document(type_name)
-                            stats["documents"] += 1
-
-                        if record:
-                            # Set properties
-                            for k, v in doc_data.items():
-                                if k.startswith("@"):
-                                    continue
-                                record.set(k, v)
-
-                            record.save()
-                            count += 1
-
-                            if count % batch_size == 0:
-                                self.database.commit()
-                                self.database.begin()
-
-                    except json.JSONDecodeError:
-                        stats["errors"] += 1
-                    except Exception:
-                        stats["errors"] += 1
-
-            self.database.commit()
-        except Exception:
-            self.database.rollback()
-            raise
-
-        stats["duration_ms"] = (time.time() - start_time) * 1000
-        return stats
-
-
 def import_neo4j(database, file_path: str, **options) -> Dict[str, Any]:
     """
     Import Neo4j JSONL export into database using Java Neo4jImporter.