Merge pull request #85 from goldlabelapps/staging

goldlabelapps · web-flow · commit c9be02826818 · 2026-04-13T14:17:15.000+01:00
This pull request introduces several improvements and cleanups to the prompt handling API, particularly around LinkedIn profile analysis and prompt record management.
diff --git a/app/api/prompt/linkedin.py b/app/api/prompt/linkedin.py
@@ -1,3 +1,5 @@
+import os
+
 from fastapi import APIRouter, Depends, HTTPException
 
 from app.utils.api_key_auth import get_api_key
@@ -9,10 +11,22 @@
 
 @router.post("/prompt/linkedin")
 def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key)) -> dict:
-    """POST /prompt/linkedin: return cached completion for linkedinUrl when available."""
-    linkedin_url = (payload.get("linkedinUrl") or "").strip()
+    """POST /prompt/linkedin: return cached completion or create a new Gemini analysis."""
+    linkedin_url = (payload.get("linkedin_url") or payload.get("linkedinUrl") or "").strip()
     if not linkedin_url:
-        raise HTTPException(status_code=400, detail="Missing 'linkedinUrl' in request body.")
+        raise HTTPException(status_code=400, detail="Missing 'linkedin_url' in request body.")
+
+    prompt = (payload.get("prompt") or "").strip()
+    if not prompt:
+        prompt = (
+            "Analyse this LinkedIn profile URL and provide a concise summary of the person, "
+            "their role, company, seniority, likely responsibilities, and notable signals. "
+            f"LinkedIn URL: {linkedin_url}"
+        )
+
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        raise HTTPException(status_code=500, detail="Gemini API key not configured.")
 
     conn = None
     cur = None
@@ -21,43 +35,164 @@ def linkedin_prompt_success(payload: dict, api_key: str = Depends(get_api_key))
         cur = conn.cursor()
         cur.execute(
             """
-            SELECT id, completion, time, model, data
-            FROM prompt
-            WHERE (data->>'linkedinUrl' = %s OR prompt ILIKE %s)
-            ORDER BY id DESC
-            LIMIT 1;
-            """,
-            (linkedin_url, f"%{linkedin_url}%"),
+            SELECT EXISTS (
+                SELECT 1
+                FROM information_schema.columns
+                WHERE table_schema = 'public'
+                  AND table_name = 'prompt'
+                  AND column_name = 'search_vector'
+            );
+            """
         )
+        exists_row = cur.fetchone()
+        has_search_vector = bool(exists_row and exists_row[0])
+
+        if has_search_vector:
+            cur.execute(
+                """
+                SELECT id, prompt, completion, time, model, data
+                FROM prompt
+                WHERE (
+                    COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s
+                    OR search_vector @@ plainto_tsquery('english', %s)
+                    OR prompt ILIKE %s
+                )
+                ORDER BY id DESC
+                LIMIT 1;
+                """,
+                (linkedin_url, linkedin_url, f"%{linkedin_url}%"),
+            )
+        else:
+            cur.execute(
+                """
+                SELECT id, prompt, completion, time, model, data
+                FROM prompt
+                WHERE (COALESCE(data->>'linkedin_url', data->>'linkedinUrl') = %s OR prompt ILIKE %s)
+                ORDER BY id DESC
+                LIMIT 1;
+                """,
+                (linkedin_url, f"%{linkedin_url}%"),
+            )
         row = cur.fetchone()
 
         if row:
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
             return {
                 "meta": make_meta("success", "LinkedIn URL already analysed"),
                 "data": {
                     "cached": True,
                     "id": row[0],
-                    "linkedinUrl": linkedin_url,
-                    "completion": row[1],
-                    "time": row[2].isoformat() if row[2] else None,
-                    "model": row[3],
-                    "record_data": row[4],
+                    "linkedin_url": linkedin_url,
+                    "prompt": row[1],
+                    "completion": row[2],
+                    "time": row[3].isoformat() if row[3] else None,
+                    "model": row[4],
+                    "record_data": row[5],
                 },
             }
 
+        cur.close()
+        conn.close()
+        cur = None
+        conn = None
+
+        import json
+        import logging
+        import time as time_mod
+        from app import __version__
+        from google import genai
+
+        client = genai.Client(api_key=gemini_api_key)
+        model_names = [
+            "models/gemini-flash-latest",
+            "models/gemini-1.5-pro",
+            "models/gemini-1.5-flash",
+            "models/gemini-1.0-pro",
+            "models/gemini-pro",
+            "models/gemini-pro-vision",
+        ]
+        response = None
+        completion = None
+        used_model = None
+        errors = {}
+        start_time = time_mod.time()
+        for model_name in model_names:
+            try:
+                response = client.models.generate_content(model=model_name, contents=prompt)
+                completion = getattr(response, "text", None)
+                if completion:
+                    used_model = model_name
+                    break
+            except Exception as model_exc:
+                errors[model_name] = str(model_exc)
+                continue
+
+        duration = time_mod.time() - start_time
+        if not completion:
+            error_details = " | ".join([f"{name}: {message}" for name, message in errors.items()])
+            raise Exception(
+                "No available Gemini model succeeded for generate_content with your API key. "
+                f"Details: {error_details}"
+            )
+
+        record_id = None
+        record_data = {
+            "version": __version__,
+            "linkedin_url": linkedin_url,
+        }
+        try:
+            conn = get_db_connection_direct()
+            cur = conn.cursor()
+            data_blob = json.dumps(record_data)
+            if has_search_vector:
+                cur.execute(
+                    """
+                    INSERT INTO prompt (prompt, completion, duration, model, data, search_vector)
+                    VALUES (%s, %s, %s, %s, %s, to_tsvector('english', %s || ' ' || %s))
+                    RETURNING id;
+                    """,
+                    (prompt, completion, duration, used_model, data_blob, prompt, completion)
+                )
+            else:
+                cur.execute(
+                    """
+                    INSERT INTO prompt (prompt, completion, duration, model, data)
+                    VALUES (%s, %s, %s, %s, %s)
+                    RETURNING id;
+                    """,
+                    (prompt, completion, duration, used_model, data_blob)
+                )
+            record_id_row = cur.fetchone()
+            record_id = record_id_row[0] if record_id_row else None
+            conn.commit()
+            cur.close()
+            conn.close()
+            cur = None
+            conn = None
+        except Exception as db_exc:
+            logging.error(f"Failed to insert prompt record: {db_exc}")
+
         return {
-            "meta": make_meta("warning", "LinkedIn URL not analysed yet"),
+            "meta": make_meta("success", f"Gemini completion received from {used_model}"),
             "data": {
                 "cached": False,
-                "linkedinUrl": linkedin_url,
-                "completion": None,
+                "id": record_id,
+                "linkedin_url": linkedin_url,
+                "prompt": prompt,
+                "completion": completion,
+                "duration": duration,
+                "model": used_model,
+                "record_data": record_data,
             },
         }
     except HTTPException:
         raise
     except Exception as e:
         return {
-            "meta": make_meta("error", f"DB error: {str(e)}"),
+            "meta": make_meta("error", f"Gemini API error: {str(e)}"),
             "data": {},
         }
     finally:
diff --git a/app/api/prompt/prompt.py b/app/api/prompt/prompt.py
@@ -11,87 +11,47 @@ def get_prompt_records(
     request: Request,
     page: int = Query(1, ge=1, description="Page number (1-based)"),
     page_size: int = Query(10, ge=1, le=100, description="Records per page"),
-    prospect_id: int = Query(None, description="Filter by prospect_id"),
     api_key: str = Depends(get_api_key)
 ) -> dict:
     """GET /prompt: Paginated list of prompt completions."""
     try:
         conn = get_db_connection_direct()
         cur = conn.cursor()
-        if prospect_id is not None:
-            # No pagination for single prospect_id lookup
-            select_query = """
-                SELECT id, prompt, completion, duration, time, data, model, prospect_id
-                FROM prompt
-                WHERE prospect_id = %s
-                ORDER BY id DESC
-            """
-            cur.execute(select_query, (prospect_id,))
-            rows = cur.fetchall()
-            records = [
-                {
-                    "id": row[0],
-                    "prompt": row[1],
-                    "completion": row[2],
-                    "duration": row[3],
-                    "time": row[4].isoformat() if row[4] else None,
-                    "data": row[5],
-                    "model": row[6],
-                    "prospect_id": row[7],
-                }
-                for row in rows
-            ]
-            cur.close()
-            conn.close()
-            if records:
-                meta = make_meta("success", f"Found {len(records)} record(s) for prospect_id {prospect_id}")
-                return {
-                    "meta": meta,
-                    "data": records,
-                }
-            else:
-                meta = make_meta("warning", f"No records found for prospect_id {prospect_id}")
-                return {
-                    "meta": meta,
-                    "data": [],
-                }
-        else:
-            offset = (page - 1) * page_size
-            cur.execute("SELECT COUNT(*) FROM prompt;")
-            count_row = cur.fetchone()
-            total = count_row[0] if count_row and count_row[0] is not None else 0
-            cur.execute("""
-                SELECT id, prompt, completion, duration, time, data, model, prospect_id
-                FROM prompt
-                ORDER BY id DESC
-                LIMIT %s OFFSET %s;
-            """, (page_size, offset))
-            records = [
-                {
-                    "id": row[0],
-                    "prompt": row[1],
-                    "completion": row[2],
-                    "duration": row[3],
-                    "time": row[4].isoformat() if row[4] else None,
-                    "data": row[5],
-                    "model": row[6],
-                    "prospect_id": row[7],
-                }
-                for row in cur.fetchall()
-            ]
-            cur.close()
-            conn.close()
-            meta = make_meta("success", f"Prompt {len(records)} records (page {page})")
-            return {
-                "meta": meta,
-                "data": {
-                    "page": page,
-                    "page_size": page_size,
-                    "total": total,
-                    "pages": (total + page_size - 1) // page_size,
-                    "data": records,
-                },
+        offset = (page - 1) * page_size
+        cur.execute("SELECT COUNT(*) FROM prompt;")
+        count_row = cur.fetchone()
+        total = count_row[0] if count_row and count_row[0] is not None else 0
+        cur.execute("""
+            SELECT id, prompt, completion, duration, time, data, model
+            FROM prompt
+            ORDER BY id DESC
+            LIMIT %s OFFSET %s;
+        """, (page_size, offset))
+        records = [
+            {
+                "id": row[0],
+                "prompt": row[1],
+                "completion": row[2],
+                "duration": row[3],
+                "time": row[4].isoformat() if row[4] else None,
+                "data": row[5],
+                "model": row[6],
             }
+            for row in cur.fetchall()
+        ]
+        cur.close()
+        conn.close()
+        meta = make_meta("success", f"Prompt {len(records)} records (page {page})")
+        return {
+            "meta": meta,
+            "data": {
+                "page": page,
+                "page_size": page_size,
+                "total": total,
+                "pages": (total + page_size - 1) // page_size,
+                "data": records,
+            },
+        }
     except Exception as e:
         meta = make_meta("error", f"DB error: {str(e)}")
         return {"meta": meta, "data": {}}
@@ -100,7 +60,6 @@ def get_prompt_records(
 def llm_post(payload: dict) -> dict:
     """POST /prompt: send prompt to Gemini, returns completion google-genai SDK."""
     prompt = payload.get("prompt")
-    prospect_id = payload.get("prospect_id")
     if not prompt:
         raise HTTPException(status_code=400, detail="Missing 'prompt' in request body.")
     api_key = os.getenv("GEMINI_API_KEY")
@@ -148,11 +107,11 @@ def llm_post(payload: dict) -> dict:
             cur = conn.cursor()
             cur.execute(
                 """
-                INSERT INTO prompt (prompt, completion, duration, data, model, prospect_id)
-                VALUES (%s, %s, %s, %s, %s, %s)
+                INSERT INTO prompt (prompt, completion, duration, data, model)
+                VALUES (%s, %s, %s, %s, %s)
                 RETURNING id;
                 """,
-                (prompt, completion, duration, data_blob, used_model, prospect_id)
+                (prompt, completion, duration, data_blob, used_model)
             )
             record_id_row = cur.fetchone()
             record_id = record_id_row[0] if record_id_row else None
diff --git a/app/api/prompt/sql/create_table.sql b/app/api/prompt/sql/create_table.sql
@@ -6,7 +6,6 @@ CREATE TABLE IF NOT EXISTS prompt (
 	completion TEXT NOT NULL,
 	duration FLOAT,
 	time TIMESTAMPTZ DEFAULT NOW(),
-	data JSONB,
 	model TEXT,
-	prospect_id INTEGER REFERENCES prospects(id)
+	data JSONB
 );
diff --git a/app/api/prompt/sql/drop_llm_table.sql b/app/api/prompt/sql/drop_llm_table.sql