Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,42 @@ jobs:
options: --health-cmd pg_isready --health-interval 5s --health-timeout 2s --health-retries 5 --name postgres --hostname postgres

steps:
- name: Enable PostgreSQL slow query logging and auto_explain
run: |
PG_CONTAINER=$(docker ps -q --filter "ancestor=pgvector/pgvector:pg17")
docker exec $PG_CONTAINER bash -c "\
cat >> /var/lib/postgresql/data/postgresql.conf <<EOF
Comment on lines +95 to +99

# slow query logging
log_min_duration_statement = 10

# auto_explain for slow queries
session_preload_libraries = 'auto_explain'
auto_explain.log_min_duration = '10ms'
auto_explain.log_analyze = on
auto_explain.log_buffers = on
auto_explain.log_nested_statements = on
auto_explain.log_verbose = on

# file-based logging
logging_collector = on
log_directory = '/var/log/pg_log'
log_filename = 'postgresql.log'
log_file_mode = 0644
EOF"
Comment on lines +112 to +117

docker exec $PG_CONTAINER psql -U root -d nextcloud -c "SELECT pg_reload_conf();"
sleep 2

# logging_collector requires a restart to take effect
docker restart $PG_CONTAINER
# wait for postgres to be ready again
until docker exec $PG_CONTAINER pg_isready -U root; do sleep 1; done

# verify the config has been loaded
docker exec $PG_CONTAINER psql -U root -d nextcloud -c "SHOW log_min_duration_statement;"
docker exec $PG_CONTAINER psql -U root -d nextcloud -c "SHOW session_preload_libraries;"

- name: Checkout server
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
with:
Expand Down Expand Up @@ -378,6 +414,12 @@ jobs:
/tmp/0_pgdump_nextcloud
/tmp/1_pgdump_nextcloud

- name: Show PostgreSQL slow query logs
if: always()
run: |
PG_CONTAINER=$(docker ps -q --filter "ancestor=pgvector/pgvector:pg17")
docker exec $PG_CONTAINER cat /var/log/pg_log/postgresql.log
Comment on lines +420 to +421

- name: Final stats log
if: always()
run: |
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ v2.1.0 introduces repair steps. These run on app startup.

`repair2001_date20240412153300.py` removes the existing config.yaml in the persistent storage for the
hardware detection to run and place a suitable config (based on accelerator detected) in its place.
To skip this step (or steps in the future), populate the `repair.info` file with the repair file name(s).
To skip this step (or steps in the future), add the repair filename(s) to `repair.info` in the persistent storage, one filename per line.
Use the below command inside the container or add the repair filename manually in the repair.info file inside the docker container at `/nc_app_context_chat_backend_data`

`echo repair2001_date20240412153300.py > "$APP_PERSISTENT_STORAGE/repair.info"`
`echo repair2001_date20240412153300.py >> "$APP_PERSISTENT_STORAGE/repair.info"`

#### How to generate a repair step file
`APP_VERSION` should at least be incremented at the minor level (MAJOR.MINOR.PATCH)
Expand Down
28 changes: 28 additions & 0 deletions context_chat_backend/repair/repair5004_date20260521105831.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
#
import os

import sqlalchemy as sa

'''
Add an index on access_list.source_id to speed up ON DELETE CASCADE
triggered when deleting from the docs table.
Without this index, the CASCADE performs a sequential scan of access_list
for each deleted doc row, causing very slow batch deletes.
'''


def run(_previous_version: int):
db_url = os.environ.get('CCB_DB_URL')
if not db_url:
print('CCB_DB_URL not set, skipping access_list index migration', flush=True)
return

engine = sa.create_engine(db_url)
with engine.connect() as conn:
conn.execute(sa.text(
'CREATE INDEX IF NOT EXISTS idx_access_list_source_id ON access_list (source_id)'
))
conn.commit()
Comment on lines +25 to +28
92 changes: 84 additions & 8 deletions context_chat_backend/repair/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
import re
from importlib import import_module

REPAIR_DIR = 'context_chat_backend/repair'
VERSION_INFO_FILE = 'version.info'
REPAIR_SKIP_FILE = 'repair.info'
PARTIAL_REPAIR_FILE = 'partial_repair.tmp'


def get_previous_version(version_info_path: str) -> tuple[int, bool]:
'''
Expand All @@ -15,8 +20,15 @@ def get_previous_version(version_info_path: str) -> tuple[int, bool]:
if not os.path.exists(version_info_path):
return (0, False)

with open(version_info_path) as f:
version_string = f.read().strip()
try:
with open(version_info_path) as f:
version_string = f.read().strip()
except OSError as e:
print(
f'Warning: could not read {version_info_path}, assuming no previous version was installed: {e}',
flush=True,
)
return (0, False)
Comment on lines +28 to +31

if not version_string:
return (0, False)
Expand All @@ -33,17 +45,36 @@ def get_previous_version(version_info_path: str) -> tuple[int, bool]:
return (int(major + minor.zfill(3)), repairs_pending)


def get_skipped_repairs(persistent_storage_path: str) -> set[str]:
repair_info_path = os.path.join(persistent_storage_path, REPAIR_SKIP_FILE)
if not os.path.exists(repair_info_path):
return set()

try:
with open(repair_info_path) as f:
return {line.strip() for line in f if line.strip()}
except OSError as e:
print(f'Warning: could not read {repair_info_path}, no repairs will be skipped: {e}', flush=True)
return set()


def main():
'''
Run repairs that have not been run before.
Repair files can either have no functions or a run() function.
To skip a repair, add its filename to repair.info in the persistent storage.
'''
print('Running repairs...', flush=True)

persistent_storage_path = os.getenv('APP_PERSISTENT_STORAGE', 'persistent_storage')
version_info_path = os.path.join(persistent_storage_path, 'version.info')

all_filenames = os.listdir('context_chat_backend/repair')
version_info_path = os.path.join(persistent_storage_path, VERSION_INFO_FILE)
partial_repair_path = os.path.join(persistent_storage_path, PARTIAL_REPAIR_FILE)

try:
all_filenames = os.listdir(REPAIR_DIR)
except OSError as e:
print(f'Error: could not list repair directory to get all the eligible repairs: {e}', flush=True)
raise
repair_filenames = [f for f in all_filenames if f.startswith('repair') and f.endswith('.py')]

(previous_app_version, repairs_pending) = get_previous_version(version_info_path)
Expand All @@ -52,6 +83,17 @@ def main():
print('No repairs are required.', flush=True)
return

skipped_repairs = get_skipped_repairs(persistent_storage_path)

try:
with open(partial_repair_path) as f:
partial_repairs = {line.strip() for line in f if line.strip()}
except FileNotFoundError:
partial_repairs = set()
except OSError as e:
print(f'Warning: could not read {partial_repair_path}, all pending repairs will be re-run: {e}', flush=True)
partial_repairs = set()

for repair_filename in repair_filenames:
pattern = re.compile(r'^repair(\d+)_date\d+\.py$')
matches = pattern.match(repair_filename)
Expand All @@ -65,16 +107,50 @@ def main():
print(f'No repairs to run for version {introduced_version}.', flush=True)
continue

if repair_filename in skipped_repairs:
print(f'Skipping repair {repair_filename} (listed in repair.info).', flush=True)
continue

if repair_filename in partial_repairs:
print(f'Skipping repair {repair_filename} (already completed in partial run).', flush=True)
continue

print(f'Running repair {repair_filename}...', flush=True, end='')

mod = import_module(f'.repair.{repair_filename[:-3]}', 'context_chat_backend')
if hasattr(mod, 'run'):
mod.run(previous_app_version)
try:
mod.run(previous_app_version)
except Exception:
print(
'failed.\n'
'The app will not continue further until this repair step succeeds, '
'or is skipped through the method described in https://github.com/nextcloud/context_chat_backend/#repair \n' # noqa: E501
'If not skipped, it will be tried again in the next app startup.',
flush=True,
)
raise

try:
with open(partial_repair_path, 'a') as f:
f.write(repair_filename + '\n')
except OSError as e:
print(f'Warning: could not write to {partial_repair_path}: {e}', flush=True)

print('completed.', flush=True)

with open(version_info_path, 'w') as f:
f.write(os.environ['APP_VERSION'] + '+')
try:
if os.path.exists(partial_repair_path):
os.unlink(partial_repair_path)
except OSError as e:
print(f'Warning: could not remove {partial_repair_path}: {e}', flush=True)

try:
with open(version_info_path, 'w') as f:
f.write(os.environ['APP_VERSION'] + '+')
except OSError as e:
print(f'Error: could not write {version_info_path}: {e}', flush=True)
return

print('Repairs completed.', flush=True)

Expand Down
1 change: 1 addition & 0 deletions context_chat_backend/vectordb/pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class AccessListStore(Base):
f'{DOCUMENTS_TABLE_NAME}.source_id',
ondelete='CASCADE',
),
index=True,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DB migration needs to be done for this to happen on existing installations.

)

__table_args__ = (
Expand Down
Loading