agentic-memorizer/config.yaml.example at master · leefowlercu/agentic-memorizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# Agentic Memorizer Configuration
# Copy this file to ~/.config/memorizer/config.yaml and customize as needed.
# All values shown are defaults unless otherwise noted.
# Environment variables can override any setting using the MEMORIZER_ prefix.
# Example: MEMORIZER_DAEMON_HTTP_PORT=9000 overrides daemon.http_port

# ------------------------------------------------------------------------------
# Logging Configuration
# ------------------------------------------------------------------------------

# Log level controls verbosity of log output.
# Valid values: debug, info, warn, error
log_level: info

# Path to the log file. Supports ~ for home directory expansion.
log_file: ~/.config/memorizer/memorizer.log

# ------------------------------------------------------------------------------
# Daemon Configuration
# ------------------------------------------------------------------------------
# Settings for the background daemon process that monitors directories,
# processes files, and maintains the knowledge graph.

daemon:
  # Port for the HTTP health check server.
  # The daemon exposes /health and /status endpoints on this port.
  http_port: 7600

  # Bind address for the HTTP server.
  # Use 127.0.0.1 for localhost-only access, 0.0.0.0 for all interfaces.
  http_bind: 127.0.0.1

  # Graceful shutdown timeout in seconds.
  # The daemon waits this long for in-flight operations to complete before
  # forcefully terminating.
  shutdown_timeout: 30

  # Path to the PID file. Used to detect if daemon is already running
  # and for service management commands (stop, status).
  pid_file: ~/.config/memorizer/daemon.pid

  # Interval in seconds for periodic knowledge graph rebuilds.
  # Set to 0 to disable periodic rebuilds.
  rebuild_interval: 3600

  # Metrics collection settings
  metrics:
    # Interval in seconds between metrics collection cycles.
    # Metrics include component health, queue depths, and processing stats.
    collection_interval: 15

  # Event bus settings
  event_bus:
    # Buffer size for each subscriber's event channel.
    # Larger values reduce drops but use more memory per subscriber.
    buffer_size: 100

    # Maximum number of critical events retained in the queue.
    # Critical events are persisted to the consolidated storage database.
    critical_queue_capacity: 1000

# ------------------------------------------------------------------------------
# Storage Configuration
# ------------------------------------------------------------------------------
# Settings for the consolidated SQLite database that stores remembered paths,
# file state, critical events, and the persistence queue.

storage:
  # Path to the consolidated SQLite database file.
  # Supports ~ for home directory expansion.
  database_path: ~/.config/memorizer/memorizer.db

# ------------------------------------------------------------------------------
# Persistence Queue Configuration
# ------------------------------------------------------------------------------
# Settings for the durable persistence queue that stores analysis results
# when the graph database is unavailable. Results are automatically persisted
# to the graph when connectivity is restored.

persistence_queue:
  # Maximum number of persistence attempts before marking an item as failed.
  # Failed items are retained for inspection but not automatically retried.
  max_retries: 3

  # Base delay in milliseconds between retry attempts.
  # Actual delay may include exponential backoff.
  retry_backoff_ms: 1000

  # Number of items to process in each drain batch.
  # Larger values improve throughput but use more memory.
  drain_batch_size: 10

  # How long to keep completed items (in minutes) before purging.
  # Completed items are purged after each drain cycle.
  completed_retention_min: 60

  # How long to keep failed items (in days) before purging.
  # Failed items are retained longer for debugging purposes.
  failed_retention_days: 7

# ------------------------------------------------------------------------------
# Graph Database Configuration
# ------------------------------------------------------------------------------
# Connection settings for FalkorDB (Redis Graph), which stores the
# knowledge graph containing files, chunks, metadata, and relationships.

graph:
  # FalkorDB server hostname or IP address.
  host: localhost

  # FalkorDB server port.
  port: 6379

  # Name of the graph within FalkorDB.
  # Multiple memorizer instances can use different graph names on the same server.
  name: memorizer

  # Environment variable name containing the FalkorDB password.
  # Leave the env var unset for passwordless connections.
  # The password is read from this env var at runtime, not stored in config.
  password_env: MEMORIZER_GRAPH_PASSWORD

  # Maximum retry attempts for failed graph operations.
  max_retries: 3

  # Delay in milliseconds between retry attempts.
  retry_delay_ms: 1000

  # Size of the write queue for batching graph updates.
  # Larger values improve throughput but use more memory.
  write_queue_size: 1000

# ------------------------------------------------------------------------------
# Semantic Analysis Provider Configuration
# ------------------------------------------------------------------------------
# Settings for the AI provider used to analyze file content, extract topics,
# identify entities, and generate summaries.

semantic:
  # Enable or disable semantic analysis.
  # When disabled, only metadata and embeddings (if enabled) are generated.
  # Env override: MEMORIZER_SEMANTIC_ENABLED
  enabled: true

  # Semantic analysis provider name.
  # Valid values: anthropic, openai, google
  provider: anthropic

  # Model identifier for the semantic provider.
  # Default varies by provider:
  #   anthropic: claude-sonnet-4-5-20250929, claude-opus-4-5-20251101, claude-haiku-4-5-20251015
  #   openai: gpt-5.2, gpt-5.2-pro, gpt-5-mini
  #   google: gemini-3-pro-preview, gemini-3-flash-preview, gemini-2.5-flash
  model: claude-sonnet-4-5-20250929

  # Rate limit in requests per minute.
  # Prevents exceeding provider API limits. The daemon uses token bucket
  # rate limiting with burst capacity equal to this value.
  rate_limit: 10

  # API key for the semantic provider.
  # Prefer using api_key_env instead of storing keys in config files.
  # If both are set, api_key takes precedence.
  # api_key: sk-...

  # Environment variable name containing the API key.
  # Default varies by provider:
  #   anthropic: ANTHROPIC_API_KEY
  #   openai: OPENAI_API_KEY
  #   google: GOOGLE_API_KEY
  api_key_env: ANTHROPIC_API_KEY

# ------------------------------------------------------------------------------
# Embeddings Provider Configuration
# ------------------------------------------------------------------------------
# Settings for the AI provider used to generate vector embeddings for
# semantic similarity search across chunks.

embeddings:
  # Enable or disable embeddings generation.
  # When disabled, semantic similarity search will not be available.
  enabled: true

  # Embeddings provider name.
  # Valid values: openai, voyage, google
  provider: openai

  # Model identifier for the embeddings provider.
  # Default varies by provider:
  #   openai: text-embedding-3-large, text-embedding-3-small
  #   voyage: voyage-3-large, voyage-3.5, voyage-code-3
  #   google: gemini-embedding-001, text-embedding-004
  model: text-embedding-3-large

  # Vector dimensions for the embeddings model.
  # Must match the model's output dimensions:
  #   openai text-embedding-3-large: 3072, text-embedding-3-small: 1536
  #   voyage voyage-3-large/voyage-3.5/voyage-code-3: 1024
  #   google gemini-embedding-001: 3072, text-embedding-004: 768
  dimensions: 3072

  # API key for the embeddings provider.
  # Prefer using api_key_env instead of storing keys in config files.
  # If both are set, api_key takes precedence.
  # api_key: sk-...

  # Environment variable name containing the API key.
  # Default varies by provider:
  #   openai: OPENAI_API_KEY
  #   voyage: VOYAGE_API_KEY
  #   google: GOOGLE_API_KEY
  api_key_env: OPENAI_API_KEY

# ------------------------------------------------------------------------------
# Default Skip/Include Patterns
# ------------------------------------------------------------------------------
# These patterns are applied by default when remembering new directories.
# They can be overridden per-directory using flags on the remember command.
# Include patterns override corresponding skip patterns for the same items.

defaults:
  skip:
    # File extensions to skip (without content analysis).
    # These files are not chunked, analyzed, or added to the knowledge graph.
    extensions: [".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".lib", ".obj", ".pyc", ".pyo", ".class", ".zip", ".tar", ".gz", ".tgz", ".rar", ".7z", ".jar", ".war", ".map", ".tmp", ".temp", ".bak", ".swp", ".swo", ".log"]

    # Directory names to skip entirely (not traversed).
    # The daemon will not descend into directories with these names.
    directories: [".git", ".svn", ".hg", "node_modules", "bower_components", "__pycache__", ".pytest_cache", ".mypy_cache", ".tox", "venv", ".venv", "dist", "build", "target", ".idea", ".vscode", "coverage", ".nyc_output", "htmlcov"]

    # Specific file names or glob patterns to skip.
    # Supports glob patterns like *.min.js for pattern matching.
    files: [".DS_Store", "Thumbs.db", "desktop.ini", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "Cargo.lock", "go.sum", "Gemfile.lock", "poetry.lock", "composer.lock", "*.min.js", "*.min.css", "*.bundle.js", "4913", "#*", "*~"]

    # Skip hidden files and directories (names starting with .).
    # When true, dotfiles and dotfolders are ignored unless explicitly included.
    hidden: true

  include:
    # File extensions to explicitly include (overrides skip.extensions).
    # Use this to include specific extensions that would otherwise be skipped.
    extensions: []

    # Directory names to explicitly include (overrides skip.directories).
    # Use this to include specific directories that would otherwise be skipped.
    directories: []

    # Specific file names to explicitly include (overrides skip.files).
    # Use this to include specific files that would otherwise be skipped.
    files: []