basic-memory/src/basic_memory/config.py at 7c37e49427dd0d7a64cb0bc9eb28a20ec57a4970 · basicmachines-co/basic-memory · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Configuration management for basic-memory."""

import importlib.util
import json
import os
import shutil
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Literal, Optional, List, Tuple

from loguru import logger
from pydantic import AliasChoices, BaseModel, Field, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

from basic_memory import __version__
from basic_memory.telemetry import configure_telemetry
from basic_memory.utils import setup_logging, generate_permalink


DATABASE_NAME = "memory.db"
APP_DATABASE_NAME = "memory.db"  # Using the same name but in the app directory
DATA_DIR_NAME = ".basic-memory"
CONFIG_FILE_NAME = "config.json"
WATCH_STATUS_JSON = "watch-status.json"

Environment = Literal["test", "dev", "user"]


class ProjectMode(str, Enum):
    """Per-project routing mode."""

    LOCAL = "local"
    CLOUD = "cloud"


class DatabaseBackend(str, Enum):
    """Supported database backends."""

    SQLITE = "sqlite"
    POSTGRES = "postgres"


def _default_semantic_search_enabled() -> bool:
    """Enable semantic search by default when required local semantic dependencies exist."""
    required_modules = ("fastembed", "sqlite_vec")
    return all(
        importlib.util.find_spec(module_name) is not None for module_name in required_modules
    )


@dataclass
class ProjectConfig:
    """Configuration for a specific basic-memory project."""

    name: str
    home: Path
    mode: ProjectMode = ProjectMode.LOCAL

    @property
    def project(self):
        return self.name  # pragma: no cover

    @property
    def project_url(self) -> str:  # pragma: no cover
        return f"/{generate_permalink(self.name)}"


class CloudProjectConfig(BaseModel):
    """Sync configuration for a cloud project.

    This tracks the local working directory and sync state for a project
    that is synced with Basic Memory Cloud.

    DEPRECATED: Kept for backward-compatible migration only. New code should
    use ProjectEntry fields (cloud_sync_path, bisync_initialized, last_sync).
    """

    local_path: str = Field(description="Local working directory path for this cloud project")
    last_sync: Optional[datetime] = Field(
        default=None, description="Timestamp of last successful sync operation"
    )
    bisync_initialized: bool = Field(
        default=False, description="Whether rclone bisync baseline has been established"
    )


class ProjectEntry(BaseModel):
    """Unified project configuration entry.

    Replaces the old triple of projects (Dict[str, str]), project_modes
    (Dict[str, ProjectMode]), and cloud_projects (Dict[str, CloudProjectConfig])
    with a single structure per project.
    """

    path: str = Field(description="Local filesystem path for the project")
    mode: ProjectMode = Field(
        default=ProjectMode.LOCAL,
        description="Routing mode: local (in-process ASGI) or cloud (remote API)",
    )
    workspace_id: Optional[str] = Field(
        default=None,
        description="Cloud workspace tenant_id. Set by 'bm project set-cloud --workspace'.",
    )
    # Cloud sync state (replaces CloudProjectConfig)
    local_sync_path: Optional[str] = Field(
        default=None,
        description="Local working directory for bisync",
        validation_alias=AliasChoices("local_sync_path", "cloud_sync_path"),
    )
    bisync_initialized: bool = Field(
        default=False,
        description="Whether rclone bisync baseline has been established",
    )
    last_sync: Optional[datetime] = Field(
        default=None,
        description="Timestamp of last successful sync operation",
    )


class BasicMemoryConfig(BaseSettings):
    """Pydantic model for Basic Memory global configuration."""

    env: Environment = Field(default="dev", description="Environment name")

    projects: Dict[str, ProjectEntry] = Field(
        default_factory=lambda: {
            "main": ProjectEntry(
                path=str(Path(os.getenv("BASIC_MEMORY_HOME", Path.home() / "basic-memory")))
            )
        }
        if os.getenv("BASIC_MEMORY_HOME")
        else {},
        description="Mapping of project names to their ProjectEntry configuration",
    )
    default_project: Optional[str] = Field(
        default=None,
        description="Name of the default project to use. When set, acts as fallback when no project parameter is specified. Set to null to disable automatic project resolution.",
    )

    # overridden by ~/.basic-memory/config.json
    log_level: str = "INFO"

    # Optional Logfire telemetry (disabled by default)
    logfire_enabled: bool = Field(
        default=False,
        description="Enable Logfire instrumentation for local development or managed deployments.",
    )
    logfire_send_to_logfire: bool = Field(
        default=False,
        description="When true, allow Logfire to export telemetry to the configured backend.",
    )
    logfire_service_name: str = Field(
        default="basic-memory",
        description="Base service name used when constructing entrypoint-specific Logfire service names.",
    )
    logfire_environment: str | None = Field(
        default=None,
        description="Optional override for Logfire environment. Defaults to env when unset.",
    )

    # Database configuration
    database_backend: DatabaseBackend = Field(
        default=DatabaseBackend.SQLITE,
        description="Database backend to use (sqlite or postgres)",
    )

    database_url: Optional[str] = Field(
        default=None,
        description="Database connection URL. For Postgres, use postgresql+asyncpg://user:pass@host:port/db. If not set, SQLite will use default path.",
    )

    # Semantic search configuration
    semantic_search_enabled: bool = Field(
        default_factory=_default_semantic_search_enabled,
        description="Enable semantic search (vector/hybrid retrieval). Works on both SQLite and Postgres backends. Requires semantic dependencies (included by default).",
    )
    semantic_embedding_provider: str = Field(
        default="fastembed",
        description="Embedding provider for local semantic indexing/search.",
    )
    semantic_embedding_model: str = Field(
        default="bge-small-en-v1.5",
        description="Embedding model identifier used by the local provider.",
    )
    semantic_embedding_dimensions: int | None = Field(
        default=None,
        description="Embedding vector dimensions. Auto-detected from provider if not set (384 for FastEmbed, 1536 for OpenAI).",
    )
    # Trigger: full local rebuilds spend most of their time waiting behind shared
    # embed flushes, not constructing vectors themselves.
    # Why: smaller FastEmbed batches cut queue wait far more than they increase
    # write overhead on real-world projects, which makes full reindex materially faster.
    # Outcome: default to the smaller local/cloud-safe batch size we benchmarked as
    # the current best end-to-end setting in the shared vector sync pipeline.
    semantic_embedding_batch_size: int = Field(
        default=2,
        description="Batch size for embedding generation.",
        gt=0,
    )
    semantic_embedding_request_concurrency: int = Field(
        default=4,
        description="Maximum number of concurrent provider requests for batched embedding generation when the active provider supports request-level concurrency.",
        gt=0,
    )
    semantic_embedding_sync_batch_size: int = Field(
        default=2,
        description="Batch size for vector sync orchestration flushes.",
        gt=0,
    )
    semantic_postgres_prepare_concurrency: int = Field(
        default=4,
        description="Number of Postgres entity prepare tasks to run concurrently during vector sync. Postgres only; keep this low to avoid overdriving the database connection pool.",
        gt=0,
        le=16,
    )
    semantic_embedding_cache_dir: str | None = Field(
        default=None,
        description="Optional cache directory for FastEmbed model artifacts.",
    )
    semantic_embedding_threads: int | None = Field(
        default=None,
        description="Optional FastEmbed runtime thread count override.",
        gt=0,
    )
    semantic_embedding_parallel: int | None = Field(
        default=None,
        description="Optional FastEmbed embed() parallelism override.",
        gt=0,
    )
    semantic_vector_k: int = Field(
        default=100,
        description="Vector candidate count for vector and hybrid retrieval.",
        gt=0,
    )
    semantic_min_similarity: float = Field(
        default=0.55,
        description="Minimum similarity score for vector search results. Results below this threshold are filtered out. 0.0 disables filtering.",
        ge=0.0,
        le=1.0,
    )
    default_search_type: Literal["text", "vector", "hybrid"] | None = Field(
        default=None,
        description="Default search type for search_notes when not specified per-query. "
        "Valid values: text, vector, hybrid. "
        "When unset, defaults to 'hybrid' if semantic search is enabled, otherwise 'text'.",
    )

    # Database connection pool configuration (Postgres only)
    db_pool_size: int = Field(
        default=20,
        description="Number of connections to keep in the pool (Postgres only)",
        gt=0,
    )
    db_pool_overflow: int = Field(
        default=40,
        description="Max additional connections beyond pool_size under load (Postgres only)",
        gt=0,
    )
    db_pool_recycle: int = Field(
        default=180,
        description="Recycle connections after N seconds to prevent stale connections. Default 180s works well with Neon's ~5 minute scale-to-zero (Postgres only)",
        gt=0,
    )

    # Watch service configuration
    sync_delay: int = Field(
        default=1000, description="Milliseconds to wait after changes before syncing", gt=0
    )

    watch_project_reload_interval: int = Field(
        default=300,
        description="Seconds between reloading project list in watch service. Higher values reduce CPU usage by minimizing watcher restarts. Default 300s (5 min) balances efficiency with responsiveness to new projects.",
        gt=0,
    )

    # update permalinks on move
    update_permalinks_on_move: bool = Field(
        default=False,
        description="Whether to update permalinks when files are moved or renamed. default (False)",
    )

    sync_changes: bool = Field(
        default=True,
        description="Whether to sync changes in real time. default (True)",
    )

    sync_thread_pool_size: int = Field(
        default=4,
        description="Size of thread pool for file I/O operations in sync service. Default of 4 is optimized for cloud deployments with 1-2GB RAM.",
        gt=0,
    )

    sync_max_concurrent_files: int = Field(
        default=10,
        description="Maximum number of files to process concurrently during sync. Limits memory usage on large projects (2000+ files). Lower values reduce memory consumption.",
        gt=0,
    )
    index_batch_size: int = Field(
        default=32,
        description="Maximum number of changed files to load into one indexing batch.",
        gt=0,
    )
    index_batch_max_bytes: int = Field(
        default=8 * 1024 * 1024,
        description="Maximum total bytes to load into one indexing batch. Large files still run as single-file batches.",
        gt=0,
    )
    index_parse_max_concurrent: int = Field(
        default=8,
        description="Maximum number of markdown parse tasks to run concurrently inside one indexing batch.",
        gt=0,
    )
    index_entity_max_concurrent: int = Field(
        default=4,
        description="Maximum number of entity create/update tasks to run concurrently inside one indexing batch.",
        gt=0,
    )
    index_metadata_update_max_concurrent: int = Field(
        default=4,
        description="Maximum number of metadata/search refresh tasks to run concurrently inside one indexing batch.",
        gt=0,
    )

    kebab_filenames: bool = Field(
        default=False,
        description="Format for generated filenames. False preserves spaces and special chars, True converts them to hyphens for consistency with permalinks",
    )

    disable_permalinks: bool = Field(
        default=False,
        description="Disable automatic permalink generation in frontmatter. When enabled, new notes won't have permalinks added and sync won't update permalinks. Existing permalinks will still work for reading.",
    )

    write_note_overwrite_default: bool = Field(
        default=False,
        description=(
            "Default value for write_note's overwrite parameter. "
            "When False (default), write_note errors if note already exists. "
            "Set to True to restore pre-v0.20 upsert behavior. "
            "Env: BASIC_MEMORY_WRITE_NOTE_OVERWRITE_DEFAULT"
        ),
    )

    ensure_frontmatter_on_sync: bool = Field(
        default=True,
        description="Ensure markdown files have frontmatter during sync by adding derived title/type/permalink when missing. When combined with disable_permalinks=True, this setting takes precedence for missing-frontmatter files and still writes permalinks.",
    )

    permalinks_include_project: bool = Field(
        default=True,
        description="When True, generated permalinks are prefixed with the project slug (e.g., 'specs/search'). Existing permalinks remain unchanged unless explicitly updated.",
    )

    skip_initialization_sync: bool = Field(
        default=False,
        description="Skip expensive initialization synchronization. Useful for cloud/stateless deployments where project reconciliation is not needed.",
    )

    # File formatting configuration
    format_on_save: bool = Field(
        default=False,
        description="Automatically format files after saving using configured formatter. Disabled by default.",
    )

    formatter_command: Optional[str] = Field(
        default=None,
        description="External formatter command. Use {file} as placeholder for file path. If not set, uses built-in mdformat (Python, no Node.js required). Set to 'npx prettier --write {file}' for Prettier.",
    )

    formatters: Dict[str, str] = Field(
        default_factory=dict,
        description="Per-extension formatters. Keys are extensions (without dot), values are commands. Example: {'md': 'prettier --write {file}', 'json': 'prettier --write {file}'}",
    )

    formatter_timeout: float = Field(
        default=5.0,
        description="Maximum seconds to wait for formatter to complete",
        gt=0,
    )

    # Project path constraints
    project_root: Optional[str] = Field(
        default=None,
        description="If set, all projects must be created underneath this directory. Paths will be sanitized and constrained to this root. If not set, projects can be created anywhere (default behavior).",
    )

    # Cloud configuration
    cloud_client_id: str = Field(
        default="client_01K6KWQPW6J1M8VV7R3TZP5A6M",
        description="OAuth client ID for Basic Memory Cloud",
    )

    cloud_domain: str = Field(
        default="https://eloquent-lotus-05.authkit.app",
        description="AuthKit domain for Basic Memory Cloud",
    )

    cloud_host: str = Field(
        default_factory=lambda: os.getenv(
            "BASIC_MEMORY_CLOUD_HOST", "https://cloud.basicmemory.com"
        ),
        description="Basic Memory Cloud host URL",
    )

    cloud_promo_opt_out: bool = Field(
        default=False,
        description="Disable CLI cloud promo messages when true.",
    )

    cloud_promo_first_run_shown: bool = Field(
        default=False,
        description="Tracks whether the first-run cloud promo message has been shown.",
    )

    cloud_promo_last_version_shown: Optional[str] = Field(
        default=None,
        description="Most recent cloud promo version shown in CLI.",
    )

    auto_update: bool = Field(
        default=True,
        description="Enable automatic CLI update checks and installs when supported.",
    )

    update_check_interval: int = Field(
        default=86400,
        description="Seconds between automatic update checks.",
        gt=0,
    )

    auto_update_last_checked_at: Optional[datetime] = Field(
        default=None,
        description="Timestamp of the last attempted automatic update check.",
    )

    cloud_api_key: Optional[str] = Field(
        default=None,
        description="API key for cloud access (bmc_ prefixed). Account-level, not per-project.",
    )

    default_workspace: Optional[str] = Field(
        default=None,
        description="Default cloud workspace tenant_id. Set by 'bm cloud workspace set-default'.",
    )

    @model_validator(mode="before")
    @classmethod
    def migrate_legacy_projects(cls, data: Any) -> Any:
        """Migrate old-format config (Dict[str, str]) to new ProjectEntry format.

        Old format stored projects as three separate dicts:
          projects:      {"name": "/path"}
          project_modes: {"name": "cloud"}
          cloud_projects: {"name": {"local_path": "...", ...}}

        New format unifies them into:
          projects: {"name": {"path": "/path", "mode": "cloud", ...}}

        Also removes stale keys (default_project_mode, permalinks_include_project)
        that are no longer part of the config model.
        """
        if not isinstance(data, dict):
            return data

        # --- Remove stale keys from old config versions ---
        data.pop("default_project_mode", None)
        data.pop("cloud_mode", None)

        projects = data.get("projects", {})
        if not projects:
            return data

        # Check if already in new format — peek at first value
        first_value = next(iter(projects.values()), None)
        if isinstance(first_value, str):
            # Old format: {"name": "/path"} → convert
            project_modes = data.pop("project_modes", {})
            cloud_projects = data.pop("cloud_projects", {})
            new_projects: Dict[str, Any] = {}
            for name, path in projects.items():
                entry: Dict[str, Any] = {"path": path}
                if name in project_modes:
                    entry["mode"] = project_modes[name]
                if name in cloud_projects:
                    cp = cloud_projects[name]
                    if isinstance(cp, dict):
                        entry["local_sync_path"] = cp.get("local_path")
                        entry["bisync_initialized"] = cp.get("bisync_initialized", False)
                        entry["last_sync"] = cp.get("last_sync")
                    else:
                        # Already a CloudProjectConfig-like object
                        entry["local_sync_path"] = getattr(cp, "local_path", None)
                        entry["bisync_initialized"] = getattr(cp, "bisync_initialized", False)
                        entry["last_sync"] = getattr(cp, "last_sync", None)
                new_projects[name] = entry

            # Pick up cloud_projects entries not already in projects
            # These are cloud-only projects — path should be the local working
            # directory (if one exists), local_path goes into local_sync_path for bisync
            for name, cp in cloud_projects.items():
                if name not in new_projects:
                    if isinstance(cp, dict):
                        local_path = cp.get("local_path", "")
                        new_projects[name] = {
                            "path": local_path or "",
                            "mode": project_modes.get(name, "cloud"),
                            "local_sync_path": local_path,
                            "bisync_initialized": cp.get("bisync_initialized", False),
                            "last_sync": cp.get("last_sync"),
                        }

            data["projects"] = new_projects
        else:
            # New format or dict-based — just clean up stale keys
            data.pop("project_modes", None)
            data.pop("cloud_projects", None)

        # --- Promote local_sync_path into path for cloud projects with slug paths ---
        # Trigger: project entry has local_sync_path set but path is a cloud slug (not absolute)
        # Why: path must always be the local filesystem path; the cloud remote is derivable
        # Outcome: path becomes the local directory, local_sync_path kept for backwards compat
        projects = data.get("projects", {})
        for name, entry in projects.items():
            if isinstance(entry, dict):
                lsp = entry.get("local_sync_path")
                path = entry.get("path", "")
                if lsp and not os.path.isabs(path):
                    entry["path"] = lsp

        return data

    @property
    def is_test_env(self) -> bool:
        """Check if running in a test environment.

        Returns True if any of:
        - env field is set to "test"
        - BASIC_MEMORY_ENV environment variable is "test"
        - PYTEST_CURRENT_TEST environment variable is set (pytest is running)

        Used to disable features like file watchers during tests.
        """
        return (
            self.env == "test"
            or os.getenv("BASIC_MEMORY_ENV", "").lower() == "test"
            or os.getenv("PYTEST_CURRENT_TEST") is not None
        )

    def get_project_mode(self, project_name: str) -> ProjectMode:
        """Get the routing mode for a project.

        Returns the per-project mode if set.
        Unknown projects (not in local config) default to CLOUD —
        local projects are always registered in config.
        """
        entry = self.projects.get(project_name)
        return entry.mode if entry else ProjectMode.CLOUD

    def set_project_mode(self, project_name: str, mode: ProjectMode) -> None:
        """Set the routing mode for a project.

        Creates a minimal ProjectEntry if the project doesn't already exist,
        preserving backward compatibility with code that sets mode before
        adding a full project entry.
        """
        if project_name in self.projects:
            self.projects[project_name].mode = mode
        else:
            self.projects[project_name] = ProjectEntry(path="", mode=mode)

    @classmethod
    def for_cloud_tenant(
        cls,
        database_url: str,
        projects: Optional[Dict[str, "ProjectEntry"]] = None,
    ) -> "BasicMemoryConfig":
        """Create config for cloud tenant - no config.json, database is source of truth.

        This factory method creates a BasicMemoryConfig suitable for cloud deployments
        where:
        - Database is Postgres (Neon), not SQLite
        - Projects are discovered from the database, not config file
        - Path validation is skipped (no local filesystem in cloud)
        - Initialization sync is skipped (stateless deployment)

        Args:
            database_url: Postgres connection URL for tenant database
            projects: Optional project mapping (usually empty, discovered from DB)

        Returns:
            BasicMemoryConfig configured for cloud mode
        """
        return cls(  # pragma: no cover
            database_backend=DatabaseBackend.POSTGRES,
            database_url=database_url,
            projects=projects or {},
            skip_initialization_sync=True,
        )

    model_config = SettingsConfigDict(
        env_prefix="BASIC_MEMORY_",
        extra="ignore",
    )

    def get_project_path(self, project_name: Optional[str] = None) -> Path:  # pragma: no cover
        """Get the path for a specific project or the default project."""
        name = project_name or self.default_project

        if name not in self.projects:
            raise ValueError(f"Project '{name}' not found in configuration")

        return Path(self.projects[name].path)

    def model_post_init(self, __context: Any) -> None:
        """Ensure configuration is valid after initialization."""
        # Skip project initialization in cloud mode - projects are discovered from DB
        if self.database_backend == DatabaseBackend.POSTGRES:  # pragma: no cover
            return  # pragma: no cover

        # Trigger: no projects configured (fresh install or empty config)
        # Why: every config needs at least one project to be functional
        # Outcome: creates "main" project using BASIC_MEMORY_HOME or ~/basic-memory
        if not self.projects:
            self.projects["main"] = ProjectEntry(
                path=str(Path(os.getenv("BASIC_MEMORY_HOME", Path.home() / "basic-memory")))
            )

        # Trigger: default_project was not explicitly provided in the input data
        #          (config file omitted the key, or BasicMemoryConfig() called with no args)
        # Why: callers like get_project_config() expect a valid project name;
        #      but explicit None (discovery mode) must be preserved
        # Outcome: sets default_project to the first available project
        if "default_project" not in self.model_fields_set:
            self.default_project = next(iter(self.projects.keys()))
        # Trigger: default_project was explicitly set but references a non-existent project
        # Why: project may have been removed or renamed since config was saved
        # Outcome: corrects to the first available project
        elif self.default_project is not None and self.default_project not in self.projects:
            self.default_project = next(iter(self.projects.keys()))

    @property
    def app_database_path(self) -> Path:
        """Get the path to the app-level database.

        This is the single database that will store all knowledge data
        across all projects.

        Uses BASIC_MEMORY_CONFIG_DIR when set so each process/worktree can
        isolate both config and database state.
        """
        database_path = self.data_dir_path / APP_DATABASE_NAME
        if not database_path.exists():  # pragma: no cover
            database_path.parent.mkdir(parents=True, exist_ok=True)
            database_path.touch()
        return database_path

    @property
    def database_path(self) -> Path:
        """Get SQLite database path.

        Rreturns the app-level database path
        for backward compatibility in the codebase.
        """

        # Load the app-level database path from the global config
        config_manager = ConfigManager()
        config = config_manager.load_config()  # pragma: no cover
        return config.app_database_path  # pragma: no cover

    @property
    def project_list(self) -> List[ProjectConfig]:  # pragma: no cover
        """Get all configured projects as ProjectConfig objects."""
        return [
            ProjectConfig(name=name, home=Path(entry.path), mode=entry.mode)
            for name, entry in self.projects.items()
        ]

    @model_validator(mode="after")
    def ensure_project_paths_exists(self) -> "BasicMemoryConfig":  # pragma: no cover
        """Ensure project paths exist.

        Skips path creation when using Postgres backend (cloud mode) since
        cloud tenants don't use local filesystem paths.
        """
        # Skip path creation for cloud mode - no local filesystem
        if self.database_backend == DatabaseBackend.POSTGRES:
            return self

        for name, entry in self.projects.items():
            path = Path(entry.path)
            # Skip cloud-only projects whose path is a slug, not a local directory
            if not path.is_absolute():
                continue
            if not path.exists():
                try:
                    path.mkdir(parents=True)
                except Exception as e:
                    logger.error(f"Failed to create project path: {e}")
                    raise e
        return self

    @property
    def data_dir_path(self) -> Path:
        """Get app state directory for config and default SQLite database."""
        if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"):
            return Path(config_dir)

        home = os.getenv("HOME", Path.home())
        return Path(home) / DATA_DIR_NAME


# Module-level cache for configuration
_CONFIG_CACHE: Optional[BasicMemoryConfig] = None
# Track config file mtime+size so cross-process changes (e.g. `bm project set-cloud`
# in a separate terminal) invalidate the cache in long-lived processes like the
# MCP stdio server. Using both mtime and size guards against coarse-granularity
# filesystems where two writes within the same second share the same mtime.
_CONFIG_MTIME: Optional[float] = None
_CONFIG_SIZE: Optional[int] = None


class ConfigManager:
    """Manages Basic Memory configuration."""

    def __init__(self) -> None:
        """Initialize the configuration manager."""
        home = os.getenv("HOME", Path.home())
        if isinstance(home, str):
            home = Path(home)

        # Allow override via environment variable
        if config_dir := os.getenv("BASIC_MEMORY_CONFIG_DIR"):
            self.config_dir = Path(config_dir)
        else:
            self.config_dir = home / DATA_DIR_NAME

        self.config_file = self.config_dir / CONFIG_FILE_NAME

        # Ensure config directory exists
        self.config_dir.mkdir(parents=True, exist_ok=True)

    @property
    def config(self) -> BasicMemoryConfig:
        """Get configuration, loading it lazily if needed."""
        return self.load_config()

    def load_config(self) -> BasicMemoryConfig:
        """Load configuration from file or create default.

        Environment variables take precedence over file config values,
        following Pydantic Settings best practices.

        Uses module-level cache with file mtime validation so that
        cross-process config changes (e.g. `bm project set-cloud` in a
        separate terminal) are picked up by long-lived processes like
        the MCP stdio server.
        """
        global _CONFIG_CACHE, _CONFIG_MTIME, _CONFIG_SIZE

        # Trigger: cached config exists but the on-disk file may have been
        # modified by another process (CLI command in a different terminal).
        # Why: the MCP server is long-lived; without this check it would
        # serve stale project routing forever.
        # Outcome: cheap os.stat() per access; re-read only when mtime or size differs.
        if _CONFIG_CACHE is not None:
            try:
                st = self.config_file.stat()
                current_mtime = st.st_mtime
                current_size = st.st_size
            except OSError:
                current_mtime = None
                current_size = None

            if (
                current_mtime is not None
                and current_mtime == _CONFIG_MTIME
                and current_size == _CONFIG_SIZE
            ):
                return _CONFIG_CACHE

            # mtime/size changed or file gone — invalidate and fall through to re-read
            _CONFIG_CACHE = None
            _CONFIG_MTIME = None
            _CONFIG_SIZE = None

        if self.config_file.exists():
            try:
                file_data = json.loads(self.config_file.read_text(encoding="utf-8"))

                # Detect legacy format before model validators strip stale keys
                _STALE_KEYS = {
                    "default_project_mode",
                    "project_modes",
                    "cloud_projects",
                    "cloud_mode",
                }
                needs_resave = bool(_STALE_KEYS & file_data.keys())

                # Check if projects dict uses old string-value format
                projects_raw = file_data.get("projects", {})
                if projects_raw:
                    first_val = next(iter(projects_raw.values()), None)
                    if isinstance(first_val, str):
                        needs_resave = True

                # Check if any project has local_sync_path set but path is a cloud slug
                # (will be migrated by migrate_legacy_projects validator)
                if not needs_resave:
                    for entry_data in projects_raw.values():
                        if isinstance(entry_data, dict):
                            lsp = entry_data.get("local_sync_path")
                            p = entry_data.get("path", "")
                            if lsp and not os.path.isabs(p):
                                needs_resave = True
                                break

                # First, create config from environment variables (Pydantic will read them)
                # Then overlay with file data for fields that aren't set via env vars
                # This ensures env vars take precedence

                # Get env-based config fields that are actually set
                env_config = BasicMemoryConfig()
                env_dict = env_config.model_dump()

                # Merge: file data as base, but only use it for fields not set by env
                # We detect env-set fields by comparing to default values
                merged_data = file_data.copy()

                # For fields that have env var overrides, use those instead of file values
                # The env_prefix is "BASIC_MEMORY_" so we check those
                for field_name in BasicMemoryConfig.model_fields.keys():
                    env_var_name = f"BASIC_MEMORY_{field_name.upper()}"
                    if env_var_name in os.environ:
                        # Environment variable is set, use it
                        merged_data[field_name] = env_dict[field_name]

                _CONFIG_CACHE = BasicMemoryConfig(**merged_data)

                # Record mtime+size so subsequent calls detect cross-process changes
                try:
                    st = self.config_file.stat()
                    _CONFIG_MTIME = st.st_mtime
                    _CONFIG_SIZE = st.st_size
                except OSError:
                    _CONFIG_MTIME = None
                    _CONFIG_SIZE = None

                # Re-save to normalize legacy config into current format
                if needs_resave:
                    # Create backup before overwriting so users can revert if needed
                    backup_path = self.config_file.with_suffix(".json.bak")
                    shutil.copy2(self.config_file, backup_path)
                    logger.info(f"Migrating config to current format (backup: {backup_path})")
                    save_basic_memory_config(self.config_file, _CONFIG_CACHE)

                return _CONFIG_CACHE
            except json.JSONDecodeError as e:  # pragma: no cover
                logger.error(f"Invalid JSON in config file {self.config_file}: {e}")
                raise SystemExit(
                    f"Error: config file is not valid JSON: {self.config_file}\n"
                    f"  {e}\n"
                    f"Fix or delete the file and re-run."
                )
            except Exception as e:  # pragma: no cover
                logger.error(f"Failed to load config from {self.config_file}: {e}")
                raise SystemExit(
                    f"Error: failed to load config from {self.config_file}\n"
                    f"  {e}\n"
                    f"Fix or delete the file and re-run."
                )
        else:
            config = BasicMemoryConfig()
            self.save_config(config)
            return config

    def save_config(self, config: BasicMemoryConfig) -> None:
        """Save configuration to file and invalidate cache."""
        global _CONFIG_CACHE, _CONFIG_MTIME, _CONFIG_SIZE
        save_basic_memory_config(self.config_file, config)
        # Invalidate cache so next load_config() reads fresh data
        _CONFIG_CACHE = None
        _CONFIG_MTIME = None
        _CONFIG_SIZE = None

    @property
    def projects(self) -> Dict[str, str]:
        """Get all configured projects as name -> path mapping.

        Returns the legacy Dict[str, str] format for backward compatibility
        with code that expects project name -> filesystem path.
        """
        return {name: entry.path for name, entry in self.config.projects.items()}

    @property
    def default_project(self) -> Optional[str]:
        """Get the default project name."""
        return self.config.default_project

    def add_project(self, name: str, path: str) -> ProjectConfig:
        """Add a new project to the configuration."""
        project_name, _ = self.get_project(name)
        if project_name:  # pragma: no cover
            raise ValueError(f"Project '{name}' already exists")

        # Load config, modify it, and save it
        project_path = Path(path)
        config = self.load_config()
        config.projects[name] = ProjectEntry(path=str(project_path))
        self.save_config(config)
        return ProjectConfig(name=name, home=project_path)

    def remove_project(self, name: str) -> None:
        """Remove a project from the configuration."""

        project_name, path = self.get_project(name)
        if not project_name:  # pragma: no cover
            raise ValueError(f"Project '{name}' not found")

        # Load config, check, modify, and save
        config = self.load_config()
        if project_name == config.default_project:  # pragma: no cover
            raise ValueError(f"Cannot remove the default project '{name}'")

        # Use the found project_name (which may differ from input name due to permalink matching)
        del config.projects[project_name]
        self.save_config(config)

    def set_default_project(self, name: str) -> None:
        """Set the default project."""
        project_name, path = self.get_project(name)
        if not project_name:  # pragma: no cover
            raise ValueError(f"Project '{name}' not found")

        # Load config, modify, and save
        config = self.load_config()
        config.default_project = project_name
        self.save_config(config)

    def get_project(self, name: str) -> Tuple[str, str] | Tuple[None, None]:
        """Look up a project from the configuration by name or permalink.

        Returns (project_name, path_string) for backward compatibility.
        """
        project_permalink = generate_permalink(name)
        app_config = self.config
        for project_name, entry in app_config.projects.items():
            if project_permalink == generate_permalink(project_name):
                return project_name, entry.path
        return None, None


def get_project_config(project_name: Optional[str] = None) -> ProjectConfig:
    """
    Get the project configuration for the current session.
    If project_name is provided, it will be used instead of the default project.
    """

    actual_project_name = None

    # load the config from file
    config_manager = ConfigManager()
    app_config = config_manager.load_config()

    # Get project name from environment variable
    os_project_name = os.environ.get("BASIC_MEMORY_PROJECT", None)
    if os_project_name:  # pragma: no cover
        logger.warning(
            f"BASIC_MEMORY_PROJECT is not supported anymore. Set the default project in the config instead. Setting default project to {os_project_name}"
        )
        actual_project_name = project_name
    # if the project_name is passed in, use it
    elif not project_name:
        # use default
        actual_project_name = app_config.default_project
    else:  # pragma: no cover
        actual_project_name = project_name

    # the config contains a dict[str,str] of project names and absolute paths
    assert actual_project_name is not None, "actual_project_name cannot be None"

    project_permalink = generate_permalink(actual_project_name)

    for name, entry in app_config.projects.items():
        if project_permalink == generate_permalink(name):
            return ProjectConfig(name=name, home=Path(entry.path))

    # otherwise raise error
    raise ValueError(f"Project '{actual_project_name}' not found")  # pragma: no cover


def has_cloud_credentials(config: BasicMemoryConfig) -> bool:
    """Check if cloud credentials are available (API key or OAuth token).

    Shared utility used by both MCP tools and CLI commands to determine
    whether cloud project discovery is possible.
    """
    if config.cloud_api_key:
        return True