Skip to content

Commit aa3efcd

Browse files
committed
Merge remote-tracking branch 'upstream/develop' into bugfix-duplicate-healthchecks-docker-compose
2 parents 2d2ccd1 + 01ccf9a commit aa3efcd

File tree

1 file changed

+56
-6
lines changed

1 file changed

+56
-6
lines changed
Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,76 @@
11
import re
22
import logging
3+
34
logger = logging.getLogger(__name__)
45
def sanitize_input(user_input:str) -> str:
56
"""
67
Sanitize user input to prevent injection attacks and remove unwanted characters.
8+
79
Args:
810
user_input (str): The raw input string from the user.
11+
912
Returns:
1013
str: The sanitized input string.
1114
"""
1215
try:
13-
# Remove any script tags
14-
sanitized = re.sub(r'<script.*?>.*?</script>', '', user_input, flags=re.IGNORECASE)
15-
# Remove any HTML tags
16+
sanitized = user_input
17+
18+
# Remove any style tags
19+
sanitized = re.sub(r'<style.*?>.*?</style>', '', sanitized, flags=re.IGNORECASE)
20+
21+
# Remove any HTML/script tags
1622
sanitized = re.sub(r'<.*?>', '', sanitized)
23+
24+
# Remove Phone Numbers
25+
sanitized = re.sub(r'\+?\d[\d -]{8,}\d', '[Phone Number]', sanitized)
26+
27+
# Remove Email Addresses
28+
sanitized = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[Email Address]', sanitized)
29+
30+
# Remove Medical Record Numbers (simple pattern)
31+
sanitized = re.sub(r'\bMRN[:\s]*\d+\b', '[Medical Record Number]', sanitized, flags=re.IGNORECASE)
32+
33+
# Normalize pronouns
34+
sanitized = normalize_pronouns(sanitized)
35+
1736
# Escape special characters
18-
sanitized = re.sub(r'["\'\\]', '', sanitized)
37+
sanitized = re.sub(r'\s+', '', sanitized)
38+
1939
# Limit length to prevent buffer overflow attacks
20-
max_length = 1000
40+
max_length = 5000
2141
if len(sanitized) > max_length:
2242
sanitized = sanitized[:max_length]
43+
2344
return sanitized.strip()
2445
except Exception as e:
2546
logger.error(f"Error sanitizing input: {e}")
26-
return ""
47+
return ""
48+
49+
def normalize_pronouns(text:str) -> str:
50+
"""
51+
Normalize first and second person pronouns to third person clinical language.
52+
53+
Converts patient centric pronouns to a more neutral form.
54+
Args:
55+
text (str): The input text containing pronouns.
56+
Returns:
57+
str: The text with normalized pronouns.
58+
"""
59+
# Normalize first person possessives: I, me, my, mine -> the patient
60+
text = re.sub(r'\bMy\b', 'The patient\'s', text)
61+
text = re.sub(r'\bmy\b', 'the patient\'s', text)
62+
63+
# First person subject: I -> the patient
64+
text = re.sub(r'\bI\b', 'the patient', text)
65+
66+
# First person object: me -> the patient
67+
text = re.sub(r'\bme\b', 'the patient', text)
68+
69+
# First person reflexive: myself -> the patient
70+
text = re.sub(r'\bmyself\b', 'the patient', text)
71+
72+
# Second person: you, your -> the clinician
73+
text = re.sub(r'\bYour\b', 'the clinician', text)
74+
return text
75+
76+

0 commit comments

Comments
 (0)