-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
196 lines (162 loc) · 7.1 KB
/
models.py
File metadata and controls
196 lines (162 loc) · 7.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""
models.py — Core data structures for the Excel Workbook Risk Diagnostic Tool.
All data classes are pure value objects with no business logic beyond
score computation and RAG classification. No imports from other project
modules — this file is the root of the dependency graph.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Optional
import settings
@dataclass
class Finding:
"""
Represents a single risk finding detected in a workbook.
Attributes:
check_id: Numeric identifier (1–24) matching the spec check number.
name: Short display name shown in tables and badges.
severity: One of "High", "Medium", or "Low".
category: Checker category — "Formula", "Error", "Link",
"Structure", "VBA", or "Actuarial".
description: Factual description of what was found (cell references,
counts, etc.).
sheet_name: Worksheet where the issue was found, or "Workbook" for
file-level findings.
cell_ref: Cell address (e.g. "B14") or empty string for
sheet-level findings.
explanation: Plain-English explanation of why this is a risk,
suitable for inclusion in the PDF report.
"""
check_id: int
name: str
severity: str # "High" | "Medium" | "Low"
category: str # "Formula" | "Error" | "Link" | "Structure" | "VBA" | "Actuarial"
description: str
sheet_name: str
cell_ref: str
explanation: str
def score_contribution(self) -> int:
"""Return the number of risk-score points this finding contributes."""
return settings.SEVERITY_WEIGHTS.get(self.severity, 0)
@dataclass
class WorkbookAnalysisResult:
"""
Aggregated result of analysing a single Excel workbook.
Attributes:
filename: Original filename of the uploaded workbook.
file_path: Path to the temporary file used during analysis.
file_size_mb: File size in megabytes.
analysis_timestamp: UTC datetime when analysis began.
findings: All findings detected, across all checkers.
high_sensitivity_sheets: Sheet names identified as actuarially sensitive.
total_score: Weighted sum of all finding severities.
rag_rating: "Green", "Amber", or "Red".
error_message: Non-empty string if the file could not be
analysed (e.g. password-protected). When set,
findings will be empty and the PDF will note
the file was unanalysable.
"""
filename: str
file_path: Path
file_size_mb: float
analysis_timestamp: datetime
findings: list[Finding] = field(default_factory=list)
high_sensitivity_sheets: list[str] = field(default_factory=list)
total_score: int = 0
rag_rating: str = "Green"
error_message: str = ""
ai_commentary: Optional[AICommentary] = None
# ------------------------------------------------------------------
# Computed helpers
# ------------------------------------------------------------------
def compute_score(self) -> int:
"""
Calculate and store the weighted risk score from all findings.
Returns:
The total integer score.
"""
self.total_score = sum(f.score_contribution() for f in self.findings)
return self.total_score
def compute_rag(self) -> str:
"""
Map the current total_score to a RAG (Red/Amber/Green) rating.
Uses thresholds from settings.RAG_GREEN_MAX and settings.RAG_AMBER_MAX.
Returns:
One of "Green", "Amber", or "Red".
"""
if self.total_score <= settings.RAG_GREEN_MAX:
self.rag_rating = "Green"
elif self.total_score <= settings.RAG_AMBER_MAX:
self.rag_rating = "Amber"
else:
self.rag_rating = "Red"
return self.rag_rating
def findings_by_severity(self, severity: str) -> list[Finding]:
"""Return all findings matching the given severity level."""
return [f for f in self.findings if f.severity == severity]
def findings_by_category(self, category: str) -> list[Finding]:
"""Return all findings matching the given category string."""
return [f for f in self.findings if f.category == category]
def top_findings(self, n: int = 5) -> list[Finding]:
"""
Return the top-N findings sorted by severity (High first),
then by check_id for deterministic ordering.
"""
severity_order = {"High": 0, "Medium": 1, "Low": 2}
sorted_findings = sorted(
self.findings,
key=lambda f: (severity_order.get(f.severity, 3), f.check_id),
)
return sorted_findings[:n]
def count_by_severity(self) -> dict[str, int]:
"""Return a dict of {severity: count} for all findings."""
counts: dict[str, int] = {"High": 0, "Medium": 0, "Low": 0}
for f in self.findings:
if f.severity in counts:
counts[f.severity] += 1
return counts
# ---------------------------------------------------------------------------
# AI Extension dataclasses
# ---------------------------------------------------------------------------
@dataclass
class SheetSummary:
"""Per-sheet extract used to build the AI prompt digest."""
name: str
row_count: int
col_count: int
is_sensitive: bool # True if name matches actuarial keywords
headers: list[str] # First-row and first-column string values, deduplicated
top_formulas: list[str] # Up to AI_TOP_FORMULAS_PER_SHEET, sorted by complexity score descending
named_ranges: list[str] # Names of named ranges scoped to this sheet
references_sheets: list[str] # Other sheet names this sheet's formulas reference
@dataclass
class WorkbookDigest:
"""Token-efficient structured summary of a workbook for AI analysis."""
file_name: str
sheet_summaries: list[SheetSummary]
workbook_named_ranges: dict[str, str] # name -> formula or scalar value as string
vba_present: bool
vba_module_names: list[str]
@dataclass
class FormulaExplanation:
"""AI-generated plain-English explanation of a single formula."""
sheet_name: str
cell_address: str
formula: str
explanation: str
@dataclass
class SheetNarrative:
"""AI-generated narrative describing a single worksheet."""
sheet_name: str
narrative: str
@dataclass
class AICommentary:
"""Container for all AI-generated commentary features."""
findings_narrative: Optional[str] = None
workbook_purpose: Optional[str] = None
sheet_narratives: list[SheetNarrative] = field(default_factory=list)
formula_explanations: list[FormulaExplanation] = field(default_factory=list)
assumption_commentary: Optional[str] = None
api_error: Optional[str] = None # Populated if any API call fails; partial results still used