-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsettings.py
More file actions
181 lines (147 loc) · 6.18 KB
/
settings.py
File metadata and controls
181 lines (147 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""
settings.py — Central configuration for the Excel Workbook Risk Diagnostic Tool.
All tunable thresholds, keyword lists, and scoring parameters live here.
No other module should hardcode values that belong in this file.
"""
# ---------------------------------------------------------------------------
# File size thresholds
# ---------------------------------------------------------------------------
FILE_SIZE_WARN_MB: float = 20.0
"""Workbooks above this size (MB) receive a Medium severity file-size finding."""
FILE_SIZE_CRITICAL_MB: float = 50.0
"""Workbooks above this size (MB) receive a High severity file-size finding."""
# ---------------------------------------------------------------------------
# Cell population threshold
# ---------------------------------------------------------------------------
ROW_COUNT_WARN: int = 50_000
"""Sheets with more than this many populated cells receive an Excessive Cells finding."""
# ---------------------------------------------------------------------------
# Formula checks — numeric constants that are considered "trivially safe"
# and should NOT be flagged as hardcoded literals in formulas.
# ---------------------------------------------------------------------------
SAFE_NUMERIC_CONSTANTS: set = {0, 1, 2, 10, 12, 100, 1000}
"""
Set of numeric values excluded from the hardcoded-literal check.
Extend this list to suppress false positives for commonly used constants.
"""
# ---------------------------------------------------------------------------
# Actuarial keyword list for high-sensitivity sheet detection
# ---------------------------------------------------------------------------
ACTUARIAL_KEYWORDS: list[str] = [
"assumption",
"mortality",
"lapse",
"discount",
"reserve",
"capital",
"reinsurance",
"premium",
"claim",
"exposure",
"liability",
"asset",
]
"""
Sheet names containing any of these keywords (case-insensitive) are treated
as high-sensitivity actuarial sheets and receive elevated scrutiny.
"""
# ---------------------------------------------------------------------------
# Severity scoring weights
# ---------------------------------------------------------------------------
SEVERITY_WEIGHTS: dict[str, int] = {
"High": 10,
"Medium": 3,
"Low": 1,
}
"""Points contributed to the workbook risk score per finding at each severity."""
# ---------------------------------------------------------------------------
# RAG score thresholds
# ---------------------------------------------------------------------------
RAG_GREEN_MAX: int = 10
"""Total score at or below this value maps to Green (low risk)."""
RAG_AMBER_MAX: int = 40
"""Total score above RAG_GREEN_MAX and at or below this value maps to Amber."""
# Scores above RAG_AMBER_MAX map to Red.
# ---------------------------------------------------------------------------
# VBA dangerous commands (treated as regex patterns)
# ---------------------------------------------------------------------------
DANGEROUS_VBA_COMMANDS: list[str] = [
r"\bKill\b",
r"\bShell\b",
r"\bDeleteFile\b",
r"\bSendMail\b",
r"\bOpen\b.+\bFor\b.+\bOutput\b",
r"\bFileCopy\b",
r"\bMkDir\b",
r"\bRmDir\b",
r"\bCreateObject\b",
r"\bWScript\b",
]
"""
Regex patterns matched against VBA source lines. Any match is flagged as a
dangerous VBA command (High severity).
"""
# ---------------------------------------------------------------------------
# Volatile Excel functions
# ---------------------------------------------------------------------------
VOLATILE_FUNCTIONS: list[str] = [
"NOW",
"TODAY",
"RAND",
"RANDBETWEEN",
"OFFSET",
"INDIRECT",
]
"""
Excel functions that recalculate on every worksheet change.
Their presence in actuarial models introduces non-determinism.
"""
# ---------------------------------------------------------------------------
# Consistency check tuning
# ---------------------------------------------------------------------------
MIN_RANGE_FOR_CONSISTENCY_CHECK: int = 3
"""
Minimum number of cells in a contiguous column/row range before the
inconsistent-formula check is applied.
"""
MIN_SHEETS_FOR_ASSUMPTION_CHECK: int = 2
"""
Minimum number of sheets required before the cross-sheet assumption
consistency check is run.
"""
# ---------------------------------------------------------------------------
# Hardcoded literal check — minimum digit length for integer literals
# ---------------------------------------------------------------------------
MIN_LITERAL_DIGITS: int = 2
"""
Integer literals shorter than this (i.e. single-digit numbers) are excluded
from the hardcoded-literal check even if not in SAFE_NUMERIC_CONSTANTS.
"""
# ---------------------------------------------------------------------------
# Hardcoded literal check — repeat-occurrence threshold
# ---------------------------------------------------------------------------
LITERAL_MIN_OCCURRENCES: int = 2
"""
A numeric literal is only flagged by check_hardcoded_literals() if it appears
in at least this many formula cells across the entire workbook.
- Default of 2: a number used in a single formula is not reported (it may
be a legitimate one-off constant). A number embedded in 2+ separate formula
cells is reported because it should be centralised in a named assumption cell.
- Set to 1 to flag every occurrence regardless of frequency (maximum sensitivity).
- Adjustable at runtime via the Settings panel in the Streamlit sidebar.
"""
# ---------------------------------------------------------------------------
# AI Commentary settings
# ---------------------------------------------------------------------------
AI_MODEL: str = "claude-sonnet-4-6"
"""Anthropic model ID used for all AI commentary calls."""
AI_MAX_TOKENS: int = 2048
"""Maximum tokens in each AI API response."""
AI_TOP_FORMULAS_PER_SHEET: int = 15
"""Maximum number of formulas extracted per sheet for the digest."""
AI_MAX_HEADER_VALUES: int = 30
"""Maximum number of header strings extracted per sheet."""
AI_MAX_SHEETS_FOR_NARRATIVE: int = 25
"""Sheets beyond this count are summarised by name only in the purpose prompt."""
AI_MAX_FORMULA_CHARS: int = 400
"""Formulas longer than this are truncated with '...' before being sent to AI."""