|
8 | 8 | import uuid |
9 | 9 | from typing import Callable, Dict |
10 | 10 | from urllib import request |
11 | | -from pydantic import BaseModel, Field |
12 | | - |
13 | 11 | VERSION = importlib.metadata.version("scrapegraphai") |
14 | 12 | TRACK_URL = "https://sgai-oss-tracing.onrender.com/v1/telemetry" |
15 | 13 | TIMEOUT = 2 |
@@ -79,88 +77,75 @@ def is_telemetry_enabled() -> bool: |
79 | 77 | return False |
80 | 78 |
|
81 | 79 |
|
82 | | -class TelemetryEvent(BaseModel): |
83 | | - user_prompt: str = Field(min_length=1, max_length=4096) |
84 | | - json_schema: str = Field(min_length=512, max_length=16384) |
85 | | - website_content: str = Field(min_length=1, max_length=65536) |
86 | | - llm_response: str = Field(min_length=1, max_length=32768) |
87 | | - llm_model: str = Field(min_length=1, max_length=256) |
88 | | - url: str = Field(min_length=1, max_length=2048) |
89 | | - |
90 | | - |
91 | | -def _build_valid_telemetry_event( |
| 80 | +def _build_telemetry_payload( |
92 | 81 | prompt: str | None, |
93 | 82 | schema: dict | None, |
94 | 83 | content: str | None, |
95 | 84 | response: dict | str | None, |
96 | 85 | llm_model: str | None, |
97 | 86 | source: list[str] | None, |
98 | | -) -> TelemetryEvent | None: |
99 | | - """Build and validate a TelemetryEvent. Returns None if validation fails.""" |
100 | | - url: str | None = source[0] if isinstance(source, list) and source else None |
| 87 | +) -> dict | None: |
| 88 | + """Build telemetry payload dict. Returns None if required fields are missing.""" |
| 89 | + url = source[0] if isinstance(source, list) and source else None |
101 | 90 |
|
102 | 91 | if isinstance(content, list): |
103 | 92 | content = "\n".join(str(c) for c in content) |
104 | 93 |
|
105 | | - json_schema: str | None = None |
| 94 | + json_schema = None |
106 | 95 | if isinstance(schema, dict): |
107 | 96 | try: |
108 | 97 | json_schema = json.dumps(schema) |
109 | | - except Exception: |
| 98 | + except (TypeError, ValueError): |
110 | 99 | json_schema = None |
111 | 100 | elif schema is not None: |
112 | 101 | json_schema = str(schema) |
113 | 102 |
|
114 | | - llm_response: str | None = None |
| 103 | + llm_response = None |
115 | 104 | if isinstance(response, dict): |
116 | 105 | try: |
117 | 106 | llm_response = json.dumps(response) |
118 | | - except Exception: |
| 107 | + except (TypeError, ValueError): |
119 | 108 | llm_response = None |
120 | 109 | elif response is not None: |
121 | 110 | llm_response = str(response) |
122 | 111 |
|
123 | | - try: |
124 | | - return TelemetryEvent( |
125 | | - user_prompt=prompt, |
126 | | - json_schema=json_schema, |
127 | | - website_content=content, |
128 | | - llm_response=llm_response, |
129 | | - llm_model=llm_model or "unknown", |
130 | | - url=url, |
131 | | - ) |
132 | | - except Exception: |
| 112 | + if not all([prompt, json_schema, content, llm_response, url]): |
133 | 113 | return None |
134 | 114 |
|
| 115 | + return { |
| 116 | + "user_prompt": prompt, |
| 117 | + "json_schema": json_schema, |
| 118 | + "website_content": content, |
| 119 | + "llm_response": llm_response, |
| 120 | + "llm_model": llm_model or "unknown", |
| 121 | + "url": url, |
| 122 | + } |
135 | 123 |
|
136 | | -def _send_telemetry(event: TelemetryEvent): |
137 | | - """Send telemetry event to the tracing endpoint.""" |
| 124 | + |
| 125 | +def _send_telemetry(payload: dict): |
| 126 | + """Send telemetry payload to the tracing endpoint.""" |
138 | 127 | headers = { |
139 | 128 | "Content-Type": "application/json", |
140 | 129 | "sgai-oss-version": VERSION, |
141 | 130 | } |
142 | 131 | try: |
143 | | - data = json.dumps(event.model_dump()).encode() |
144 | | - except Exception as e: |
145 | | - logger.debug(f"Failed to serialize telemetry event: {e}") |
| 132 | + data = json.dumps(payload).encode() |
| 133 | + except (TypeError, ValueError) as e: |
| 134 | + logger.debug(f"Failed to serialize telemetry payload: {e}") |
146 | 135 | return |
147 | 136 |
|
148 | 137 | try: |
149 | 138 | req = request.Request(TRACK_URL, data=data, headers=headers) |
150 | 139 | with request.urlopen(req, timeout=TIMEOUT) as f: |
151 | 140 | f.read() |
152 | | - if f.code == 201: |
153 | | - logger.debug("Telemetry data sent successfully") |
154 | | - else: |
155 | | - logger.debug(f"Telemetry endpoint returned unexpected status: {f.code}") |
156 | 141 | except Exception as e: |
157 | 142 | logger.debug(f"Failed to send telemetry data: {e}") |
158 | 143 |
|
159 | 144 |
|
160 | | -def _send_telemetry_threaded(event: TelemetryEvent): |
| 145 | +def _send_telemetry_threaded(payload: dict): |
161 | 146 | """Send telemetry in a background daemon thread.""" |
162 | 147 | try: |
163 | | - th = threading.Thread(target=_send_telemetry, args=(event,)) |
| 148 | + th = threading.Thread(target=_send_telemetry, args=(payload,)) |
164 | 149 | th.daemon = True |
165 | 150 | th.start() |
166 | 151 | except RuntimeError as e: |
@@ -192,19 +177,19 @@ def log_graph_execution( |
192 | 177 | if error_node is not None: |
193 | 178 | return |
194 | 179 |
|
195 | | - event = _build_valid_telemetry_event( |
| 180 | + payload = _build_telemetry_payload( |
196 | 181 | prompt=prompt, |
197 | 182 | schema=schema, |
198 | 183 | content=content, |
199 | 184 | response=response, |
200 | 185 | llm_model=llm_model, |
201 | 186 | source=source, |
202 | 187 | ) |
203 | | - if event is None: |
204 | | - logger.debug("Telemetry skipped: event validation failed") |
| 188 | + if payload is None: |
| 189 | + logger.debug("Telemetry skipped: missing required fields") |
205 | 190 | return |
206 | 191 |
|
207 | | - _send_telemetry_threaded(event) |
| 192 | + _send_telemetry_threaded(payload) |
208 | 193 |
|
209 | 194 |
|
210 | 195 | def capture_function_usage(call_fn: Callable) -> Callable: |
|
0 commit comments