Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion backend/backend/application/session/connection_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,28 @@
from backend.utils.pagination import CustomPaginator


def _get_host_display(con_model):
"""Extract a human-readable host string from decrypted connection details."""
try:
details = con_model.decrypted_connection_details
ds = con_model.datasource_name
if ds in ("postgres", "mysql", "trino"):
host = details.get("host", "")
port = details.get("port", "")
return f"{host}:{port}" if host and port else host or None
if ds == "snowflake":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Must Fix: _get_host_display() decrypts passwords/tokens unnecessarily

con_model.decrypted_connection_details runs Fernet decryption on every sensitive field (password, api_key, token, connection_url, etc.) — but this function only reads non-sensitive fields like host, port, account, project_id.

These fields are stored as plaintext in the DB (only fields in SENSITIVE_FIELDS are encrypted). So you can read connection_details directly and skip decryption entirely:

def _get_host_display(con_model):
    try:
        details = con_model.connection_details  # raw dict, no decryption needed
        ds = con_model.datasource_name
        if ds in ("postgres", "mysql", "trino"):
            host = details.get("host", "")
            port = details.get("port", "")
            return f"{host}:{port}" if host and port else host or None
        if ds == "snowflake":
            return details.get("account") or None
        if ds == "bigquery":
            return details.get("project_id") or None
        if ds == "databricks":
            return details.get("host") or None
        if ds == "duckdb":
            return details.get("file_path") or None
    except Exception:
        pass
    return None

This eliminates Fernet CPU overhead on every row in the list API (20 connections × decryption = noticeable on page load).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f3e25aa — now reads connection_details directly (the raw JSON) instead of decrypted_connection_details. Host, port, account, project_id are all plaintext — no Fernet decryption needed.

return details.get("account") or None
if ds == "bigquery":
return details.get("project_id") or None
if ds == "databricks":
return details.get("host") or None
if ds == "duckdb":
return details.get("file_path") or None
except Exception:
pass
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bare except Exception: pass masks real bugs (e.g., schema drift on connection_details, missing adapter). At minimum, log a warning so the field silently becoming None for everyone is debuggable.

Suggested change
except Exception:
pass
except Exception:
logger.warning("Failed to derive host display for connection %s", con_model.connection_id, exc_info=True)

Also worth a unit test with connection_details=None and missing keys, and a sanity check that host is genuinely stored unencrypted for databricks and trino (some adapters move host into the encrypted blob; if so, this returns garbage rather than None).

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in a0c5ec7 — added logger.warning("Failed to derive host display for connection %s", ..., exc_info=True).

return None


class ConnectionSession:

@staticmethod
Expand Down Expand Up @@ -58,20 +80,28 @@ def get_all_connections(page: int, limit: int, filter_condition: dict[str, Any])
is_sample_project = project_con.is_sample
else:
is_sample_project = False
env_count = EnvironmentModels.objects.filter(
connection_model_id=con_model.connection_id, is_deleted=False
).count()
project_count = ProjectDetails.objects.filter(
connection_model_id=con_model.connection_id
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Must Fix: N+1 queries — 3 extra DB queries per connection row

This loop fires env_count + project_count queries per row. Combined with the existing ProjectDetails.objects.filter().first() above, that's 3 queries per page item (60 queries for 20 connections).

Fix with annotate() — batch into a single query:

from django.db.models import Count, Q

con_models_qs = ConnectionDetails.objects.filter(
    **filter_condition, is_deleted=False
).annotate(
    env_count=Count(
        'environmentmodels',
        filter=Q(environmentmodels__is_deleted=False)
    ),
    project_count=Count('projectdetails')
).order_by('-modified_at')

Then access con_model.env_count directly in the loop — zero extra queries.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f3e25aa — replaced the per-row loop queries with a single annotate(env_count=Count(...), project_count=Count(...), is_sample=Exists(...)). Zero N+1 queries now.

).count()
connection_list.append(
{
"id": con_model.connection_id,
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
"name": con_model.connection_name,
"description": con_model.connection_description,
"datasource_name": con_model.datasource_name,
"host": _get_host_display(con_model),
"created_by": con_model.created_by,
"last_modified_by": con_model.last_modified_by,
"db_icon": import_file(f"visitran.adapters.{con_model.datasource_name}").ICON,
"is_connection_exist": con_model.is_connection_exist,
"is_connection_valid": con_model.is_connection_valid,
"connection_flag": con_model.connection_flag,
"is_sample_project": is_sample_project,
# "connection_details": con_model.connection_details, # skipping connection_details
"env_count": env_count,
"project_count": project_count,
}
)

Expand Down
16 changes: 15 additions & 1 deletion backend/backend/application/session/env_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

from visitran.utils import import_file

from backend.application.session.connection_session import ConnectionSession
from backend.application.session.connection_session import ConnectionSession, _get_host_display
from backend.application.utils import get_filter
from backend.core.models.environment_models import EnvironmentModels
from backend.core.models.project_details import ProjectDetails
from backend.core.scheduler.models import UserTaskDetails
from backend.errors.exceptions import EnvironmentAlreadyExist, EnvironmentNotExists
from backend.utils.pagination import CustomPaginator

Expand Down Expand Up @@ -89,6 +90,12 @@ def get_all_environments(self, page: int, limit: int, filter_condition: dict[str

env_data = []
for env_model in env_models.get("page_items"):
job_count = UserTaskDetails.objects.filter(
environment=env_model
).count()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Must Fix: Same N+1 pattern — 2 count queries per environment row + missing select_related

job_count and project_count fire 2 queries per row. Plus, accessing env_model.connection_model.connection_name and calling _get_host_display(env_model.connection_model) likely triggers another query per row to load the related connection (ForeignKey not prefetched).

Fix:

env_qs = EnvironmentModels.objects.filter(
    **filter_condition, is_deleted=False
).select_related(
    'connection_model'  # eliminates ForeignKey N+1
).annotate(
    job_count=Count('usertaskdetails'),
    project_count=Count('projectdetails')
).order_by('-modified_at')

select_related('connection_model') loads the connection in the same query. annotate() batches the counts. Total: 1 query instead of ~60.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f3e25aa — added select_related('connection_model') + annotate(job_count=Count(...), project_count=Count(...)). Single query for all data including connection fields.

project_count = ProjectDetails.objects.filter(
environment_model=env_model
).count()
env_data.append(
{
"id": env_model.environment_id,
Expand All @@ -100,8 +107,12 @@ def get_all_environments(self, page: int, limit: int, filter_condition: dict[str
"name": env_model.connection_model.connection_name,
"datasource_name": env_model.connection_model.datasource_name,
"db_icon": import_file(f"visitran.adapters.{env_model.connection_model.datasource_name}").ICON,
"host": _get_host_display(env_model.connection_model),
"connection_flag": env_model.connection_model.connection_flag,
},
"is_tested": env_model.is_tested,
"job_count": job_count,
"project_count": project_count,
}
)
env_models["page_items"] = env_data
Expand All @@ -118,6 +129,9 @@ def get_environment(self, environment_id: str) -> dict[str, Any]:
"id": env_model.connection_model.connection_id,
"name": env_model.connection_model.connection_name,
"datasource_name": env_model.connection_model.datasource_name,
"db_icon": import_file(f"visitran.adapters.{env_model.connection_model.datasource_name}").ICON,
"host": _get_host_display(env_model.connection_model),
"connection_flag": env_model.connection_model.connection_flag,
},
"connection_details": env_model.masked_connection_data,
"custom_data": env_model.env_custom_data,
Expand Down
Loading
Loading