|
1 | 1 | # SQL to ARC Converter Configuration Example |
2 | | -# Copy this file to config.yaml and adjust the values |
| 2 | +# This file contains ALL available configuration options with their default or example values. |
3 | 3 |
|
4 | | -# Database Connection Settings |
5 | | -# --------------------------- |
6 | | -# Name of the source database (e.g., PostgreSQL database name) |
7 | | -db_name: "edaphobase" |
| 4 | +# ------------------------------------------------------------------------------ |
| 5 | +# 1. CORE SETTINGS |
| 6 | +# ------------------------------------------------------------------------------ |
8 | 7 |
|
9 | | -# Database user with read access |
10 | | -db_user: "reader" |
| 8 | +# Full SQLAlchemy connection string for the source database. |
| 9 | +# Supported: postgresql+psycopg |
| 10 | +connection_string: "postgresql+psycopg://postgres:postgres@localhost:5432/rdi" |
11 | 11 |
|
12 | | -# Database password (will be handled securely) |
13 | | -db_password: "secure_password_here" |
| 12 | +# Unique identifier for the Research Data Infrastructure (RDI). |
| 13 | +rdi: "edaphobase" |
14 | 14 |
|
15 | | -# Database host address (hostname or IP) |
16 | | -db_host: "localhost" |
| 15 | +# Public URL of the RDI portal (used for provenance metadata). |
| 16 | +rdi_url: "https://edaphobase.org" |
17 | 17 |
|
18 | | -# Database port (default: 5432 for PostgreSQL) |
19 | | -db_port: 5432 |
| 18 | +# Console logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL). |
| 19 | +log_level: "INFO" |
20 | 20 |
|
21 | | -# RDI Identifier |
22 | | -# ------------- |
23 | | -# Unique identifier for the Research Data Infrastructure (RDI) |
24 | | -# This is used to tag or namespace the converted ARCs |
25 | | -rdi: "edaphobase" |
26 | 21 |
|
| 22 | +# ------------------------------------------------------------------------------ |
| 23 | +# 2. PROCESSING & PERFORMANCE |
| 24 | +# ------------------------------------------------------------------------------ |
| 25 | + |
| 26 | +# Number of parallel worker processes for ARC generation (CPU-bound). |
| 27 | +# Recommended: Number of CPU cores available. |
| 28 | +max_concurrent_arc_builds: 4 |
| 29 | + |
| 30 | +# Maximum concurrent tasks (IO + CPU). Defaults to 4 * max_concurrent_arc_builds. |
| 31 | +max_concurrent_tasks: ~ |
| 32 | + |
| 33 | +# Number of investigations to fetch from the database per batch. |
| 34 | +db_batch_size: 100 |
| 35 | + |
| 36 | +# Safety limit: Maximum number of studies per investigation. |
| 37 | +max_studies: 5000 |
| 38 | + |
| 39 | +# Safety limit: Maximum number of assays per investigation. |
| 40 | +max_assays: 10000 |
| 41 | + |
| 42 | +# Timeout in minutes for generating a single ARC. |
| 43 | +arc_generation_timeout_minutes: 30 |
| 44 | + |
| 45 | +# (Optional) Limit processing to the first N investigations for debugging. |
| 46 | +debug_limit: ~ |
27 | 47 |
|
28 | | -# API Client Configuration |
29 | | -# ----------------------- |
30 | | -# Settings for connecting to the Middleware API to upload ARCs |
31 | | -api_client: |
32 | | - # Base URL of the Middleware API |
33 | | - api_url: "http://localhost:8000" |
34 | 48 |
|
35 | | - # Path to the client certificate file (PEM format) for mTLS authentication |
36 | | - client_cert_path: "/path/to/client.crt" |
| 49 | +# ------------------------------------------------------------------------------ |
| 50 | +# 3. MIDDLEWARE API CLIENT (mTLS) |
| 51 | +# ------------------------------------------------------------------------------ |
37 | 52 |
|
38 | | - # Path to the client private key file (PEM format) |
39 | | - client_key_path: "/path/to/client.key" |
| 53 | +api_client: |
| 54 | + # Base URL of the FAIRagro Middleware API. |
| 55 | + api_url: "http://localhost:8000" |
40 | 56 |
|
41 | | - # Path to the CA certificate file (optional, for self-signed server certs) |
42 | | - # ca_cert_path: "/path/to/ca.crt" |
| 57 | + # Mutual TLS (mTLS) Credentials |
| 58 | + client_cert_path: "dev_environment/client.crt" |
| 59 | + client_key_path: "dev_environment/client.key" |
| 60 | + |
| 61 | + # (Optional) Path to a custom CA certificate to verify the API server. |
| 62 | + ca_cert_path: ~ |
43 | 63 |
|
44 | | - # Request timeout in seconds (default: 30.0) |
| 64 | + # Request timeout in seconds. |
45 | 65 | timeout: 30.0 |
46 | 66 |
|
47 | | - # Verify SSL certificates (default: true) |
48 | | - # Set to false only for testing with self-signed certs without CA |
| 67 | + # Whether to verify the API server's SSL certificate. |
49 | 68 | verify_ssl: true |
| 69 | + |
| 70 | + # Whether to follow HTTP redirects. |
| 71 | + follow_redirects: true |
| 72 | + |
| 73 | + # Maximum concurrent HTTP requests to the API. |
| 74 | + max_concurrency: 10 |
| 75 | + |
| 76 | + # Maximum retries for transient HTTP errors (5xx, timeouts). |
| 77 | + max_retries: 3 |
| 78 | + |
| 79 | + # Exponential backoff factor for retries. |
| 80 | + retry_backoff_factor: 2.0 |
| 81 | + |
| 82 | + |
| 83 | +# ------------------------------------------------------------------------------ |
| 84 | +# 4. OPENTELEMETRY TRACING |
| 85 | +# ------------------------------------------------------------------------------ |
| 86 | + |
| 87 | +otel: |
| 88 | + # OTel collector endpoint (e.g., http://localhost:4318). |
| 89 | + # If null (~), tracing is disabled or uses default env vars. |
| 90 | + endpoint: ~ |
| 91 | + |
| 92 | + # Whether to print OpenTelemetry spans to the console in a readable format. |
| 93 | + log_console_spans: false |
| 94 | + |
| 95 | + # Logging level for OTLP log export. |
| 96 | + log_level: "INFO" |
0 commit comments