@@ -106,10 +106,10 @@ def _get_surrounding_secret_lines(
106
106
end_line = min (secret_line + surrounding_lines , len (lines ))
107
107
return "\n " .join (lines [start_line :end_line ])
108
108
109
- def obfuscate (self , text : str ) -> tuple [str , int ]:
109
+ def obfuscate (self , text : str ) -> tuple [str , List [ Match ] ]:
110
110
matches = CodegateSignatures .find_in_string (text )
111
111
if not matches :
112
- return text , 0
112
+ return text , []
113
113
114
114
logger .debug (f"Found { len (matches )} secrets in the user message" )
115
115
@@ -133,16 +133,16 @@ def obfuscate(self, text: str) -> tuple[str, int]:
133
133
protected_text = list (text )
134
134
135
135
# Store matches for logging
136
- found_secrets = 0
136
+ found_secrets = []
137
137
138
138
# First pass. Replace each match with its encrypted value
139
- logger .info ("\n Found secrets:" )
139
+ logger .info (f "\n Found { len ( absolute_matches ) } secrets:" )
140
140
for start , end , match in absolute_matches :
141
141
hidden_secret = self ._hide_secret (match )
142
142
143
143
# Replace the secret in the text
144
144
protected_text [start :end ] = hidden_secret
145
- found_secrets += 1
145
+ found_secrets . append ( match )
146
146
# Log the findings
147
147
logger .info (
148
148
f"\n Service: { match .service } "
@@ -228,7 +228,7 @@ def name(self) -> str:
228
228
229
229
def _redact_text (
230
230
self , text : str , secrets_manager : SecretsManager , session_id : str , context : PipelineContext
231
- ) -> tuple [str , int ]:
231
+ ) -> tuple [str , List [ Match ] ]:
232
232
"""
233
233
Find and encrypt secrets in the given text.
234
234
@@ -269,7 +269,7 @@ async def process(
269
269
raise ValueError ("Session ID not found in context" )
270
270
271
271
new_request = request .copy ()
272
- total_redacted = 0
272
+ total_matches = []
273
273
274
274
# Process all messages
275
275
last_assistant_idx = - 1
@@ -281,15 +281,18 @@ async def process(
281
281
for i , message in enumerate (new_request ["messages" ]):
282
282
if "content" in message and message ["content" ]:
283
283
# Protect the text
284
- protected_string , redacted_count = self ._redact_text (
284
+ protected_string , secrets_matched = self ._redact_text (
285
285
str (message ["content" ]), secrets_manager , session_id , context
286
286
)
287
287
new_request ["messages" ][i ]["content" ] = protected_string
288
288
289
- # Sum redacted count for messages after the last assistant message
289
+ # Append the matches for messages after the last assistant message
290
290
if i > last_assistant_idx :
291
- total_redacted += redacted_count
291
+ total_matches += secrets_matched
292
292
293
+ # Not count repeated secret matches
294
+ set_secrets_value = set (match .value for match in total_matches )
295
+ total_redacted = len (set_secrets_value )
293
296
context .secrets_found = total_redacted > 0
294
297
logger .info (f"Total secrets redacted since last assistant message: { total_redacted } " )
295
298
@@ -362,7 +365,6 @@ async def process_chunk(
362
365
if match :
363
366
# Found a complete marker, process it
364
367
encrypted_value = match .group (1 )
365
- print ("----> encrypted_value: " , encrypted_value )
366
368
original_value = input_context .sensitive .manager .get_original_value (
367
369
encrypted_value ,
368
370
input_context .sensitive .session_id ,
@@ -371,8 +373,6 @@ async def process_chunk(
371
373
if original_value is None :
372
374
# If value not found, leave as is
373
375
original_value = match .group (0 ) # Keep the REDACTED marker
374
- else :
375
- print ("----> original_value: " , original_value )
376
376
377
377
# Post an alert with the redacted content
378
378
input_context .add_alert (self .name , trigger_string = encrypted_value )
0 commit comments