Skip to content

Commit e06b595

Browse files
committed
Increase JVM heap size for large CSV imports and enhance error messaging for memory issues
1 parent d40ff56 commit e06b595

4 files changed

Lines changed: 52 additions & 4 deletions

File tree

.github/workflows/test-python-examples.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ jobs:
6161
6262
- name: Run all examples
6363
id: run_examples
64+
env:
65+
# Increase JVM heap for large CSV imports (example 04)
66+
ARCADEDB_JVM_MAX_HEAP: "8g"
6467
run: |
6568
cd bindings/python/examples
6669

bindings/python/examples/04_csv_import_documents.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,19 @@
4949
- arcadedb-embedded (any distribution: headless, minimal, or full)
5050
- MovieLens dataset (downloaded via download_sample_data.py)
5151
- JRE 21+
52+
- Sufficient JVM heap memory (8GB recommended for large dataset)
5253
5354
Usage:
5455
1. First download the dataset:
5556
python download_sample_data.py
56-
2. Run this example:
57-
python 04_csv_import_documents.py
57+
2. Run this example with sufficient memory:
58+
ARCADEDB_JVM_MAX_HEAP="8g" python 04_csv_import_documents.py
59+
60+
Memory Requirements:
61+
- Small dataset (~100K ratings): 4GB heap (default) is sufficient
62+
- Large dataset (~33M ratings): 4GB heap (default) should work, 8GB for safety
63+
- Very large datasets (100M+ records): Set ARCADEDB_JVM_MAX_HEAP="8g" or higher
64+
- Must be set BEFORE running the script (before JVM starts)
5865
5966
Note: This example creates a database at ./my_test_databases/movielens_db/
6067
The database files are preserved so you can inspect them after running.
@@ -74,6 +81,18 @@
7481
print("=" * 70)
7582
print()
7683

84+
# Check JVM heap configuration for large imports
85+
jvm_heap = os.environ.get("ARCADEDB_JVM_MAX_HEAP")
86+
if jvm_heap:
87+
print(f"💡 JVM Max Heap: {jvm_heap}")
88+
else:
89+
print("💡 JVM Max Heap: 4g (default)")
90+
print(" ℹ️ Using default JVM heap (4g)")
91+
print(" 💡 For very large datasets, you can increase it:")
92+
print(' export ARCADEDB_JVM_MAX_HEAP="8g" # or run with:')
93+
print(' ARCADEDB_JVM_MAX_HEAP="8g" python 04_csv_import_documents.py')
94+
print()
95+
7796
# -----------------------------------------------------------------------------
7897
# Step 0: Check Dataset Availability
7998
# -----------------------------------------------------------------------------

bindings/python/src/arcadedb_embedded/importer.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,32 @@ def _import_using_java(
409409
return final_stats
410410

411411
except Exception as e:
412+
error_msg = str(e)
413+
414+
# Check for common memory-related errors
415+
if any(
416+
mem_indicator in error_msg.lower()
417+
for mem_indicator in [
418+
"java heap space",
419+
"out of memory",
420+
"outofmemoryerror",
421+
]
422+
):
423+
current_heap = os.environ.get("ARCADEDB_JVM_MAX_HEAP")
424+
if current_heap:
425+
heap_msg = f"Current JVM heap: {current_heap}\n"
426+
else:
427+
heap_msg = "Current JVM heap: 4g (default)\n"
428+
429+
raise ArcadeDBError(
430+
f"Import failed ({format_type} -> {import_type}): Out of memory.\n"
431+
f"{heap_msg}"
432+
f"💡 Try increasing heap size with environment variable:\n"
433+
f' export ARCADEDB_JVM_MAX_HEAP="8g"\n'
434+
f" Note: Must be set BEFORE running Python (before JVM starts)\n"
435+
f"Original error: {e}"
436+
) from e
437+
412438
raise ArcadeDBError(
413439
f"Import failed ({format_type} -> {import_type}): {e}"
414440
) from e

bindings/python/src/arcadedb_embedded/jvm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ def start_jvm():
3838
classpath = os.pathsep.join(jar_files)
3939

4040
# Allow customization via environment variables
41-
max_heap = os.environ.get("ARCADEDB_JVM_MAX_HEAP", "2g")
41+
max_heap = os.environ.get("ARCADEDB_JVM_MAX_HEAP", "4g")
4242

4343
# Prepare JVM arguments
4444
jvm_args = [
45-
f"-Xmx{max_heap}", # Max heap (default 2g, override with env var)
45+
f"-Xmx{max_heap}", # Max heap (default 4g, override with env var)
4646
"-Djava.awt.headless=true", # Headless mode for server use
4747
]
4848

0 commit comments

Comments
 (0)