Skip to content

Commit a8f13ea

Browse files
committed
feat: add AuditPage for analyzing audit logs with visualization and markdown support
- Added AuditPage component with form for user queries - Integrated ReactMarkdown for displaying AI insights - Implemented data fetching from the API with error handling - Created AuditPage.module.css for styling the new page - Updated router to include protected route for AuditPage - Added new dependencies: react-markdown and recharts
1 parent d607fc9 commit a8f13ea

File tree

13 files changed

+2420
-14
lines changed

13 files changed

+2420
-14
lines changed

backend/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import uvicorn
33
from fastapi import FastAPI
44
from fastapi.middleware.cors import CORSMiddleware
5-
from routes import query, azure, system, user_queries, data_documents
5+
from routes import query, azure, system, user_queries, data_documents, audit
66

77
app = FastAPI()
88

@@ -55,6 +55,7 @@ async def health_check():
5555
app.include_router(system.router, prefix="/system", tags=["System"])
5656
app.include_router(user_queries.router, prefix="/user", tags=["User Queries"])
5757
app.include_router(data_documents.router, prefix="/data", tags=["Data Documents"])
58+
app.include_router(audit.router, prefix="/audit", tags=["Audit"])
5859

5960
if __name__ == "__main__":
6061
uvicorn.run(app, host="0.0.0.0", port=8000)

backend/routes/audit.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from fastapi import APIRouter, Header, Body, HTTPException
2+
from pydantic import BaseModel
3+
from typing import List, Dict, Any, Optional
4+
from services.audit_service import process_audit_question
5+
from services.azure_auth import exchange_token_obo
6+
from services.gemini_service import VisualizationConfig, AuditSummaryResponse
7+
8+
router = APIRouter()
9+
10+
class AuditQueryRequest(BaseModel):
11+
question: str
12+
13+
class AuditQueryResponse(BaseModel):
14+
sql_query: str
15+
results: List[Dict[str, Any]]
16+
summary: str
17+
visualization: Optional[VisualizationConfig] = None
18+
19+
@router.post("/query", response_model=AuditQueryResponse)
20+
def query_audit_log(
21+
body: AuditQueryRequest = Body(...),
22+
authorization: str = Header(...)
23+
):
24+
if not authorization.startswith("Bearer "):
25+
raise HTTPException(status_code=401, detail="Invalid token format")
26+
27+
# We might want to validate the token here even if we don't use it for the pg connection directly yet
28+
# user_token = authorization.replace("Bearer ", "")
29+
# access_token = exchange_token_obo(user_token)
30+
31+
response = process_audit_question(body.question)
32+
return AuditQueryResponse(**response)

backend/services/audit_service.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from typing import Dict, Any
2+
from services.pg_connection import get_connection
3+
from services.gemini_service import generate_audit_sql, summarize_audit_results
4+
import json
5+
6+
7+
def execute_audit_query(sql_query: str) -> list:
8+
"""
9+
Executes a read-only SQL query against the audit database.
10+
"""
11+
if not sql_query.lower().strip().startswith("select"):
12+
return [{"error": "Only SELECT queries are allowed."}]
13+
14+
try:
15+
conn = get_connection()
16+
cur = conn.cursor()
17+
cur.execute(sql_query)
18+
19+
# Get column names
20+
columns = [desc[0] for desc in cur.description]
21+
results = [dict(zip(columns, row)) for row in cur.fetchall()]
22+
23+
# Serialize datetime and json objects
24+
for row in results:
25+
for key, value in row.items():
26+
if hasattr(value, 'isoformat'):
27+
row[key] = value.isoformat()
28+
elif isinstance(value, dict):
29+
# Ensure dicts (like diff_data) are kept as dicts for the frontend
30+
pass
31+
32+
cur.close()
33+
conn.close()
34+
return results
35+
except Exception as e:
36+
print(f"Error executing audit query: {e}")
37+
return [{"error": str(e)}]
38+
39+
40+
def process_audit_question(question: str) -> Dict[str, Any]:
41+
"""
42+
Orchestrates the process of answering a user's audit question:
43+
1. Generate SQL from NL question (via Gemini)
44+
2. Execute SQL
45+
3. Summarize results (via Gemini)
46+
"""
47+
sql_query = generate_audit_sql(question)
48+
49+
# If the generator returned an error query or invalid SQL, return it
50+
if "Error:" in sql_query:
51+
return {
52+
"sql_query": sql_query,
53+
"results": [],
54+
"summary": "Could not generate a valid query for your request."
55+
}
56+
57+
results = execute_audit_query(sql_query)
58+
59+
# If execution failed
60+
if results and "error" in results[0]:
61+
return {
62+
"sql_query": sql_query,
63+
"results": [],
64+
"summary": f"Error executing query: {results[0]['error']}"
65+
}
66+
67+
summary_response = summarize_audit_results(question, sql_query, results)
68+
69+
return {
70+
"sql_query": sql_query,
71+
"results": results,
72+
"summary": summary_response.summary,
73+
"visualization": summary_response.visualization
74+
}

backend/services/gemini_service.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
from google import genai
22
from google.genai import types
33
from models.schemas import GeneratedCode, CollectionContext, DebugSuggestionResponse
4+
from pydantic import BaseModel, Field
5+
from typing import Optional, List, Any, Dict
6+
7+
class VisualizationConfig(BaseModel):
8+
available: bool = Field(description="Whether a chart is recommended for this data")
9+
type: Optional[str] = Field(description="Type of chart: 'bar', 'line', 'pie', 'scatter'")
10+
x_key: Optional[str] = Field(description="Key for X-axis data")
11+
y_key: Optional[str] = Field(description="Key for Y-axis data")
12+
title: Optional[str] = Field(description="Title for the chart")
13+
data_keys: Optional[List[str]] = Field(description="Keys to include in the chart data points (e.g. ['count', 'date'])")
14+
15+
class AuditSummaryResponse(BaseModel):
16+
summary: str = Field(description="Markdown summary of the results")
17+
visualization: VisualizationConfig = Field(description="Configuration for data visualization")
418

519
PROMPT_TEMPLATE_QUERY = """
620
You are an assistant that converts user requests into MongoDB query code.
@@ -132,3 +146,89 @@ def generate_suggestion_from_query_error(query: str, error_message: str) -> str:
132146
response.text.strip() if hasattr(response, "text") else str(response).strip
133147
)
134148
return DebugSuggestionResponse(suggestion=suggestion)
149+
150+
151+
PROMPT_TEMPLATE_AUDIT_SQL = """
152+
You are a PostgreSQL expert. Convert the user's natural language question into a read-only SQL query for the `write_audit_log` table.
153+
Table Schema:
154+
- user_email (text): Email of the user who performed the operation.
155+
- operation (text): 'insert', 'update', or 'delete'.
156+
- database_name (text): Name of the database (format: account.database).
157+
- collection_name (text): Name of the collection.
158+
- document_id (text): ID of the affected document.
159+
- diff_data (jsonb): JSON containing the changes (for updates, it has 'before' and 'after' fields).
160+
- timestamp_utc (timestamptz): When the operation occurred.
161+
162+
User Question: "{user_input}"
163+
164+
Rules:
165+
1. Return ONLY the SQL query. No markdown, no explanations.
166+
2. The query MUST be a SELECT statement.
167+
3. Use LIMIT 100 if no limit is specified.
168+
4. If the user asks for "recent", order by timestamp_utc DESC.
169+
"""
170+
171+
PROMPT_TEMPLATE_AUDIT_SUMMARY = """
172+
You are a data analyst. Analyze the following SQL query and its results.
173+
174+
User Question: "{user_input}"
175+
SQL Query: "{sql_query}"
176+
Results:
177+
{results}
178+
179+
Tasks:
180+
1. Provide a concise markdown summary identifying patterns or answering the specific question.
181+
2. Determine if the data is suitable for visualization (e.g., time series, counts, comparisons).
182+
3. If suitable, structure a visualization configuration (type, keys, title).
183+
- For time series, prefer 'line' or 'bar'.
184+
- For categorical counts, use 'bar' or 'pie'.
185+
"""
186+
187+
188+
def generate_audit_sql(user_input: str) -> str:
189+
full_prompt = PROMPT_TEMPLATE_AUDIT_SQL.format(user_input=user_input)
190+
client = genai.Client()
191+
response = client.models.generate_content(
192+
model="gemini-2.5-flash",
193+
contents=full_prompt,
194+
config=types.GenerateContentConfig(
195+
thinking_config=types.ThinkingConfig(thinking_budget=0)
196+
),
197+
)
198+
sql = extract_python_code(response.text)
199+
# Basic safety check
200+
if not sql.lower().startswith("select"):
201+
return "SELECT 'Error: Generated query was not a SELECT statement' as error;"
202+
return sql
203+
204+
205+
def summarize_audit_results(user_input: str, sql_query: str, results: list) -> AuditSummaryResponse:
206+
# Truncate results if too large to avoid token limits
207+
results_str = str(results)[:10000]
208+
full_prompt = PROMPT_TEMPLATE_AUDIT_SUMMARY.format(
209+
user_input=user_input, sql_query=sql_query, results=results_str
210+
)
211+
client = genai.Client()
212+
response = client.models.generate_content(
213+
model="gemini-2.5-flash",
214+
contents=full_prompt,
215+
config=types.GenerateContentConfig(
216+
response_mime_type="application/json",
217+
response_schema=AuditSummaryResponse,
218+
thinking_config=types.ThinkingConfig(thinking_budget=0)
219+
),
220+
)
221+
222+
if hasattr(response, 'parsed') and response.parsed:
223+
return response.parsed
224+
225+
import json
226+
try:
227+
data = json.loads(response.text)
228+
return AuditSummaryResponse(**data)
229+
except Exception as e:
230+
print(f"Error parsing Gemini response: {e}")
231+
return AuditSummaryResponse(
232+
summary="Could not generate summary due to parsing error.",
233+
visualization=VisualizationConfig(available=False)
234+
)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import unittest
2+
from unittest.mock import patch, MagicMock
3+
from services.audit_service import process_audit_question
4+
5+
class TestAuditService(unittest.TestCase):
6+
7+
@patch('services.audit_service.generate_audit_sql')
8+
@patch('services.audit_service.get_connection')
9+
@patch('services.audit_service.summarize_audit_results')
10+
def test_process_audit_question_success(self, mock_summarize, mock_get_conn, mock_generate_sql):
11+
# Setup mocks
12+
mock_generate_sql.return_value = "SELECT * FROM write_audit_log LIMIT 5"
13+
14+
mock_conn = MagicMock()
15+
mock_cursor = MagicMock()
16+
mock_get_conn.return_value = mock_conn
17+
mock_conn.cursor.return_value = mock_cursor
18+
19+
# Mock DB results
20+
mock_cursor.description = [('user_email',), ('operation',)]
21+
mock_cursor.fetchall.return_value = [('test@example.com', 'insert')]
22+
23+
mock_summarize.return_value = "Summary of results."
24+
25+
# Execute
26+
result = process_audit_question("Show me inserts")
27+
28+
# Assertions
29+
self.assertEqual(result['sql_query'], "SELECT * FROM write_audit_log LIMIT 5")
30+
self.assertEqual(len(result['results']), 1)
31+
self.assertEqual(result['results'][0]['user_email'], 'test@example.com')
32+
self.assertEqual(result['summary'], "Summary of results.")
33+
34+
mock_generate_sql.assert_called_once()
35+
mock_cursor.execute.assert_called_with("SELECT * FROM write_audit_log LIMIT 5")
36+
mock_summarize.assert_called_once()
37+
38+
@patch('services.audit_service.generate_audit_sql')
39+
def test_process_audit_question_invalid_sql(self, mock_generate_sql):
40+
mock_generate_sql.return_value = "DELETE FROM write_audit_log"
41+
42+
result = process_audit_question("Delete everything")
43+
44+
self.assertIn("Error executing query", result['summary'])
45+
self.assertIn("Only SELECT", result['summary'])
46+
47+
if __name__ == '__main__':
48+
unittest.main()

0 commit comments

Comments
 (0)