11from google import genai
22from google .genai import types
3- from models .schemas import GeneratedCode , CollectionContext , DebugSuggestionResponse
3+ from models .schemas import (
4+ GeneratedCode ,
5+ CollectionContext ,
6+ DebugSuggestionResponse ,
7+ SchemaRelationshipsResponse ,
8+ )
49from pydantic import BaseModel , Field
510from typing import Optional , List
611
@@ -31,6 +36,7 @@ class AuditSummaryResponse(BaseModel):
3136Database: {database}
3237Available collections: {collections}
3338Sample collection document (optional): {collection_context}
39+ Schema summary for ALL collections (for JOINs/lookups): {all_collections_schema}
3440Intermediate context (optional): {intermediate_context}
3541Return:
3642only one line of pure pymongo query code (e.g., db["collection"].find(...))
@@ -112,6 +118,7 @@ def generate_query_from_prompt(
112118 database : str ,
113119 collection_context : CollectionContext = None ,
114120 intermediate_context : dict = None ,
121+ all_collections_schema : str = "" ,
115122) -> GeneratedCode :
116123 # Prune intermediate_context to remove image/large data
117124 safe_intermediate_context = (
@@ -124,6 +131,7 @@ def generate_query_from_prompt(
124131 collection_context = (
125132 collection_context .sampleDocument if collection_context else ""
126133 ),
134+ all_collections_schema = all_collections_schema ,
127135 intermediate_context = safe_intermediate_context ,
128136 )
129137 client = genai .Client ()
@@ -244,3 +252,58 @@ def summarize_audit_results(
244252 summary = "Could not generate summary due to parsing error." ,
245253 visualization = VisualizationConfig (available = False ),
246254 )
255+
256+
257+ PROMPT_TEMPLATE_RELATIONSHIPS = """
258+ You are a database architect. Analyze the provided MongoDB document samples to identify likely foreign key relationships and JOIN conditions between collections.
259+
260+ Schema/Samples:
261+ {schema_summary}
262+
263+ Tasks:
264+ 1. Identify likely relationships (e.g., `userId` in `orders` -> `_id` in `users`).
265+ 2. Provide a confidence score (0.0 - 1.0) and a brief description for each.
266+ 3. Return a JSON object with a "relationships" key containing a list of these findings.
267+
268+ Output Format (Json):
269+ {{
270+ "relationships": [
271+ {{
272+ "source_collection": "orders",
273+ "source_field": "userId",
274+ "target_collection": "users",
275+ "target_field": "_id",
276+ "description": "Orders belong to Users",
277+ "confidence": 0.95
278+ }}
279+ ]
280+ }}
281+ """
282+
283+
284+ def generate_schema_relationships (schema_summary : str ) -> SchemaRelationshipsResponse :
285+ from models .schemas import SchemaRelationshipsResponse
286+
287+ full_prompt = PROMPT_TEMPLATE_RELATIONSHIPS .format (schema_summary = schema_summary )
288+ client = genai .Client ()
289+ response = client .models .generate_content (
290+ model = "gemini-2.5-flash" ,
291+ contents = full_prompt ,
292+ config = types .GenerateContentConfig (
293+ response_mime_type = "application/json" ,
294+ response_schema = SchemaRelationshipsResponse ,
295+ thinking_config = types .ThinkingConfig (thinking_budget = 0 ),
296+ ),
297+ )
298+
299+ if hasattr (response , "parsed" ) and response .parsed :
300+ return response .parsed
301+
302+ import json
303+
304+ try :
305+ data = json .loads (response .text )
306+ return SchemaRelationshipsResponse (** data )
307+ except Exception as e :
308+ print (f"Error parsing Gemini relationship response: { e } " )
309+ return SchemaRelationshipsResponse (relationships = [])
0 commit comments