Skip to content

Commit 00fcead

Browse files
authored
fix: Handle NULL values in cost and token calculations
- Initialize cost and token variables as None instead of 0.0/0 - Update database schema to remove DEFAULT values - Modify calculate_cost_and_tokens function to handle None values - Update report formatting to display "None" for NULL values - Improve metadata extraction with null safety checks Signed-off-by: pk-zipstack <praveen@zipstack.com>
1 parent fdffc5c commit 00fcead

1 file changed

Lines changed: 36 additions & 26 deletions

File tree

main.py

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def init_db():
5252
time_taken REAL,
5353
status_code INTEGER,
5454
status_api_endpoint TEXT,
55-
total_embedding_cost REAL DEFAULT 0.0,
56-
total_embedding_tokens INTEGER DEFAULT 0,
57-
total_llm_cost REAL DEFAULT 0.0,
58-
total_llm_tokens INTEGER DEFAULT 0,
55+
total_embedding_cost REAL,
56+
total_embedding_tokens INTEGER,
57+
total_llm_cost REAL,
58+
total_llm_tokens INTEGER,
5959
updated_at TEXT,
6060
created_at TEXT
6161
)"""
@@ -103,10 +103,10 @@ def update_db(
103103
status_api_endpoint,
104104
):
105105

106-
total_embedding_cost = 0.0
107-
total_embedding_tokens = 0
108-
total_llm_cost = 0.0
109-
total_llm_tokens = 0
106+
total_embedding_cost = None
107+
total_embedding_tokens = None
108+
total_llm_cost = None
109+
total_llm_tokens = None
110110

111111
if result is not None:
112112
total_embedding_cost, total_llm_cost, total_embedding_tokens, total_llm_tokens = calculate_cost_and_tokens(result)
@@ -148,16 +148,16 @@ def update_db(
148148
# Calculate total cost and tokens for detailed report
149149
def calculate_cost_and_tokens(result):
150150

151-
total_embedding_cost = 0.0
152-
total_embedding_tokens = 0
153-
total_llm_cost = 0.0
154-
total_llm_tokens = 0
151+
total_embedding_cost = None
152+
total_embedding_tokens = None
153+
total_llm_cost = None
154+
total_llm_tokens = None
155155

156156
# Extract 'extraction_result' from the result
157157
extraction_result = result.get("extraction_result", [])
158158

159159
if not extraction_result:
160-
return total_embedding_cost, total_llm_cost, total_embedding_tokens, total_llm_tokens
160+
return None, None, None, None
161161

162162
extraction_data = extraction_result[0].get("result", "")
163163

@@ -170,17 +170,25 @@ def calculate_cost_and_tokens(result):
170170
extraction_data = {}
171171

172172

173-
metadata = extraction_data.get("metadata", {})
174-
embedding_llm = metadata.get("embedding", [])
175-
extraction_llm = metadata.get("extraction_llm", [])
176-
177-
# Calculate total cost
178-
total_embedding_cost += sum(float(item.get("cost_in_dollars", "0")) for item in embedding_llm)
179-
total_llm_cost += sum(float(item.get("cost_in_dollars", "0")) for item in extraction_llm)
180-
181-
# Calculate total tokens
182-
total_embedding_tokens += sum(item.get("embedding_tokens", 0) for item in embedding_llm)
183-
total_llm_tokens += sum(item.get("total_tokens", 0) for item in extraction_llm)
173+
metadata = extraction_data.get("metadata", None)
174+
embedding_llm = metadata.get("embedding") if metadata else None
175+
extraction_llm = metadata.get("extraction_llm") if metadata else None
176+
177+
#Process embedding costs and tokens if embedding_llm list exists and is not empty
178+
if embedding_llm and not []:
179+
total_embedding_cost = 0.0
180+
total_embedding_tokens = 0
181+
for item in embedding_llm:
182+
total_embedding_cost += float(item.get("cost_in_dollars", "0"))
183+
total_embedding_tokens += item.get("embedding_tokens", 0)
184+
185+
#Process embedding costs and tokens if extraction_llm list exists and is not empty
186+
if extraction_llm and not []:
187+
total_llm_cost = 0.0
188+
total_llm_tokens = 0
189+
for item in extraction_llm:
190+
total_llm_cost += float(item.get("cost_in_dollars", "0"))
191+
total_llm_tokens += item.get("total_tokens", 0)
184192

185193
return total_embedding_cost, total_llm_cost, total_embedding_tokens, total_llm_tokens
186194

@@ -229,11 +237,13 @@ def print_report():
229237
# Tabulate the data with column headers
230238
headers = ["File Name", "Execution Status", "Time Elapsed (seconds)", "Total Embedding Cost", "Total Embedding Tokens", "Total LLM Cost", "Total LLM Tokens"]
231239

232-
# Wrap text in each column to a specific width (e.g., 30 characters for file names and 20 for others)
240+
# Wrap text in each column to a specific width (e.g., 30 characters for file names and 20 for others) and return None if the value is NULL
233241
formatted_data = []
234242
for row in report_data:
235243
formatted_row = [
236-
textwrap.fill(str(cell), width=30) if isinstance(cell, str) else f"{cell:.8f}" if isinstance(cell, float) else cell
244+
"None" if cell is None else
245+
textwrap.fill(str(cell), width=30) if isinstance(cell, str) else
246+
f"{cell:.8f}" if isinstance(cell, float) else cell
237247
for cell in row
238248
]
239249
formatted_data.append(formatted_row)

0 commit comments

Comments
 (0)