From 795feb2fa6c2fa5112a91420f812d51c23dc1dec Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Mon, 12 Jan 2026 17:54:57 -0500 Subject: [PATCH 1/6] sanitize function start --- server/api/views/assistant/sanitizer.py | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 server/api/views/assistant/sanitizer.py diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py new file mode 100644 index 00000000..0ab7b245 --- /dev/null +++ b/server/api/views/assistant/sanitizer.py @@ -0,0 +1,34 @@ +import re +import logging + +logger = logging.getLogger(__name__) +def sanitize_input(user_input:str) -> str: + """ + Sanitize user input to prevent injection attacks and remove unwanted characters. + + Args: + user_input (str): The raw input string from the user. + + Returns: + str: The sanitized input string. + """ + try: + # Remove any script tags + sanitized = re.sub(r'.*?', '', user_input, flags=re.IGNORECASE) + + # Remove any HTML tags + sanitized = re.sub(r'<.*?>', '', sanitized) + + # Escape special characters + sanitized = re.sub(r'["\'\\]', '', sanitized) + + # Optionally, limit length to prevent buffer overflow attacks + max_length = 1000 + if len(sanitized) > max_length: + sanitized = sanitized[:max_length] + + return sanitized.strip() + except Exception as e: + logger.error(f"Error sanitizing input: {e}") + return "" + From 0b5b6f8d894e40c51108c6dd68be35964bc45f8e Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Wed, 14 Jan 2026 12:17:35 -0500 Subject: [PATCH 2/6] sanitizer --- server/api/views/assistant/sanitizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index 0ab7b245..051031de 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -21,8 +21,8 @@ def sanitize_input(user_input:str) -> str: # Escape special characters sanitized = re.sub(r'["\'\\]', '', sanitized) - - # Optionally, limit length to prevent buffer overflow attacks + + # Limit length to prevent buffer overflow attacks max_length = 1000 if len(sanitized) > max_length: sanitized = sanitized[:max_length] From dc7c24be896d745080aa8342009674e8e0d2b984 Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Fri, 23 Jan 2026 21:18:20 -0500 Subject: [PATCH 3/6] sanitizer --- .DS_Store | Bin 0 -> 6148 bytes server/api/views/assistant/sanitizer.py | 8 +++++++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..9123aeaefab7f8c18bb9f785d95638322a39ee6b GIT binary patch literal 6148 zcmeHKF>V4u473A^kkU|6?iccd6(TRl2M~}B2`MPlSMjbqEi<+Y&_PFvL}ST2yI#+( zc8c@a%zXK^do`Pz*$hs!Gl#KppFXmuiZ~FCGd}Edciiqbhr@o9eLFzzvmME_(1!)cfY1w)i$V3cDlSdJHwlzEMF X?DxVUG3bZ~9jKoH>LQZ@zpcOzSZWog literal 0 HcmV?d00001 diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index 051031de..fb75b814 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -18,7 +18,13 @@ def sanitize_input(user_input:str) -> str: # Remove any HTML tags sanitized = re.sub(r'<.*?>', '', sanitized) - + + # Remove Phone Numbers + sanitized = re.sub(r'\+?\d[\d -]{8,}\d', '[Phone Number]', sanitized) + + # Remove Email Addresses + sanitized = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[Email Address]', sanitized) + # Escape special characters sanitized = re.sub(r'["\'\\]', '', sanitized) From 871d69eb7925b92df47112882281d5fce293ceee Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Tue, 27 Jan 2026 17:33:09 -0500 Subject: [PATCH 4/6] Sanitizer --- server/api/views/assistant/sanitizer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index fb75b814..b0f2453f 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -13,10 +13,12 @@ def sanitize_input(user_input:str) -> str: str: The sanitized input string. """ try: - # Remove any script tags - sanitized = re.sub(r'.*?', '', user_input, flags=re.IGNORECASE) + sanitized = user_input - # Remove any HTML tags + # Remove any style tags + sanitized = re.sub(r'.*?', '', sanitized, flags=re.IGNORECASE) + + # Remove any HTML/script tags sanitized = re.sub(r'<.*?>', '', sanitized) # Remove Phone Numbers @@ -29,7 +31,7 @@ def sanitize_input(user_input:str) -> str: sanitized = re.sub(r'["\'\\]', '', sanitized) # Limit length to prevent buffer overflow attacks - max_length = 1000 + max_length = 5000 if len(sanitized) > max_length: sanitized = sanitized[:max_length] From abd10b80f67727defaaf270c9e56a83975af6500 Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Thu, 29 Jan 2026 23:42:19 -0500 Subject: [PATCH 5/6] Sanitizer enhancements --- server/api/views/assistant/sanitizer.py | 36 +++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index 10d9579a..c31b14b9 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -27,11 +27,17 @@ def sanitize_input(user_input:str) -> str: # Remove Email Addresses sanitized = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[Email Address]', sanitized) + # Remove Medical Record Numbers (simple pattern) + sanitized = re.sub(r'\bMRN[:\s]*\d+\b', '[Medical Record Number]', sanitized, flags=re.IGNORECASE) + + # Normalize pronouns + sanitized = normalize_pronouns(sanitized) + # Escape special characters - sanitized = re.sub(r'["\'\\]', '', sanitized) + sanitized = re.sub(r'\s+', '', sanitized) # Limit length to prevent buffer overflow attacks - max_length = 1000 + max_length = 5000 if len(sanitized) > max_length: sanitized = sanitized[:max_length] @@ -39,4 +45,30 @@ def sanitize_input(user_input:str) -> str: except Exception as e: logger.error(f"Error sanitizing input: {e}") return "" + +def normalize_pronouns(text:str) -> str: + """ + Normalize first and second person pronouns to third person clinical language. + + Converts patient centric pronouns to a more neutral form. + Args: + text (str): The input text containing pronouns. + Returns: + str: The text with normalized pronouns. + """ + # Normalize first person possessives: I, me, my, mine -> the patient + text = re.sub(r'\bMy\b', 'The patient\'s', text) + text = re.sub(r'\bmy\b', 'the patient\'s', text) + + # First person subject: I -> the patient + text = re.sub(r'\bI\b', 'the patient', text) + + # First person object: me -> the patient + text = re.sub(r'\bme\b', 'the patient', text) + + # First person reflexive: myself -> the patient + text = re.sub(r'\bmyself\b', 'the patient', text) + # Second person: you, your -> the clinician + text = re.sub(r'\bYour\b', 'the clinician', text) + return text \ No newline at end of file From 8149f5d4be0f42e7e32c28fd9be491c1954510c9 Mon Sep 17 00:00:00 2001 From: Akhil Bolla Date: Fri, 30 Jan 2026 09:59:48 -0500 Subject: [PATCH 6/6] Sanitizer Improvements --- server/api/views/assistant/sanitizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index c31b14b9..658a39ae 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -71,4 +71,4 @@ def normalize_pronouns(text:str) -> str: # Second person: you, your -> the clinician text = re.sub(r'\bYour\b', 'the clinician', text) - return text \ No newline at end of file + return text