|
49 | 49 | - arcadedb-embedded (any distribution: headless, minimal, or full) |
50 | 50 | - MovieLens dataset (downloaded via download_sample_data.py) |
51 | 51 | - JRE 21+ |
| 52 | +- Sufficient JVM heap memory (8GB recommended for large dataset) |
52 | 53 |
|
53 | 54 | Usage: |
54 | 55 | 1. First download the dataset: |
55 | 56 | python download_sample_data.py |
56 | | -2. Run this example: |
57 | | - python 04_csv_import_documents.py |
| 57 | +2. Run this example with sufficient memory: |
| 58 | + ARCADEDB_JVM_MAX_HEAP="8g" python 04_csv_import_documents.py |
| 59 | +
|
| 60 | +Memory Requirements: |
| 61 | +- Small dataset (~100K ratings): 4GB heap (default) is sufficient |
| 62 | +- Large dataset (~33M ratings): 4GB heap (default) should work, 8GB for safety |
| 63 | +- Very large datasets (100M+ records): Set ARCADEDB_JVM_MAX_HEAP="8g" or higher |
| 64 | +- Must be set BEFORE running the script (before JVM starts) |
58 | 65 |
|
59 | 66 | Note: This example creates a database at ./my_test_databases/movielens_db/ |
60 | 67 | The database files are preserved so you can inspect them after running. |
|
74 | 81 | print("=" * 70) |
75 | 82 | print() |
76 | 83 |
|
| 84 | +# Check JVM heap configuration for large imports |
| 85 | +jvm_heap = os.environ.get("ARCADEDB_JVM_MAX_HEAP") |
| 86 | +if jvm_heap: |
| 87 | + print(f"💡 JVM Max Heap: {jvm_heap}") |
| 88 | +else: |
| 89 | + print("💡 JVM Max Heap: 4g (default)") |
| 90 | + print(" ℹ️ Using default JVM heap (4g)") |
| 91 | + print(" 💡 For very large datasets, you can increase it:") |
| 92 | + print(' export ARCADEDB_JVM_MAX_HEAP="8g" # or run with:') |
| 93 | + print(' ARCADEDB_JVM_MAX_HEAP="8g" python 04_csv_import_documents.py') |
| 94 | +print() |
| 95 | + |
77 | 96 | # ----------------------------------------------------------------------------- |
78 | 97 | # Step 0: Check Dataset Availability |
79 | 98 | # ----------------------------------------------------------------------------- |
|
0 commit comments