Skip to content

fix: Do not double count secrets on /explain copilot function #581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions src/codegate/pipeline/secrets/secrets.py
Original file line number Diff line number Diff line change
@@ -106,10 +106,10 @@ def _get_surrounding_secret_lines(
end_line = min(secret_line + surrounding_lines, len(lines))
return "\n".join(lines[start_line:end_line])

def obfuscate(self, text: str) -> tuple[str, int]:
def obfuscate(self, text: str) -> tuple[str, List[Match]]:
matches = CodegateSignatures.find_in_string(text)
if not matches:
return text, 0
return text, []

logger.debug(f"Found {len(matches)} secrets in the user message")

@@ -133,16 +133,16 @@ def obfuscate(self, text: str) -> tuple[str, int]:
protected_text = list(text)

# Store matches for logging
found_secrets = 0
found_secrets = []

# First pass. Replace each match with its encrypted value
logger.info("\nFound secrets:")
logger.info(f"\nFound {len(absolute_matches)} secrets:")
for start, end, match in absolute_matches:
hidden_secret = self._hide_secret(match)

# Replace the secret in the text
protected_text[start:end] = hidden_secret
found_secrets += 1
found_secrets.append(match)
# Log the findings
logger.info(
f"\nService: {match.service}"
@@ -228,7 +228,7 @@ def name(self) -> str:

def _redact_text(
self, text: str, secrets_manager: SecretsManager, session_id: str, context: PipelineContext
) -> tuple[str, int]:
) -> tuple[str, List[Match]]:
"""
Find and encrypt secrets in the given text.

@@ -269,7 +269,7 @@ async def process(
raise ValueError("Session ID not found in context")

new_request = request.copy()
total_redacted = 0
total_matches = []

# Process all messages
last_assistant_idx = -1
@@ -281,15 +281,18 @@ async def process(
for i, message in enumerate(new_request["messages"]):
if "content" in message and message["content"]:
# Protect the text
protected_string, redacted_count = self._redact_text(
protected_string, secrets_matched = self._redact_text(
str(message["content"]), secrets_manager, session_id, context
)
new_request["messages"][i]["content"] = protected_string

# Sum redacted count for messages after the last assistant message
# Append the matches for messages after the last assistant message
if i > last_assistant_idx:
total_redacted += redacted_count
total_matches += secrets_matched

# Not count repeated secret matches
set_secrets_value = set(match.value for match in total_matches)
total_redacted = len(set_secrets_value)
context.secrets_found = total_redacted > 0
logger.info(f"Total secrets redacted since last assistant message: {total_redacted}")

@@ -362,7 +365,6 @@ async def process_chunk(
if match:
# Found a complete marker, process it
encrypted_value = match.group(1)
print("----> encrypted_value: ", encrypted_value)
original_value = input_context.sensitive.manager.get_original_value(
encrypted_value,
input_context.sensitive.session_id,
@@ -371,8 +373,6 @@ async def process_chunk(
if original_value is None:
# If value not found, leave as is
original_value = match.group(0) # Keep the REDACTED marker
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops sorry this is some leftover debugging by me

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I assumed so. No problem :)

print("----> original_value: ", original_value)

# Post an alert with the redacted content
input_context.add_alert(self.name, trigger_string=encrypted_value)
12 changes: 6 additions & 6 deletions tests/pipeline/secrets/test_secrets.py
Original file line number Diff line number Diff line change
@@ -97,9 +97,9 @@ def test_hide_secret(self):
def test_obfuscate(self):
# Test text with a secret
text = "API_KEY=AKIAIOSFODNN7EXAMPLE\nOther text"
protected, count = self.encryptor.obfuscate(text)
protected, matched_secrets = self.encryptor.obfuscate(text)

assert count == 1
assert len(matched_secrets) == 1
assert "REDACTED<$" in protected
assert "AKIAIOSFODNN7EXAMPLE" not in protected
assert "Other text" in protected
@@ -128,9 +128,9 @@ def test_hide_secret(self):
def test_obfuscate(self):
# Test text with multiple secrets
text = "API_KEY=AKIAIOSFODNN7EXAMPLE\nPASSWORD=AKIAIOSFODNN7EXAMPLE"
protected, count = self.obfuscator.obfuscate(text)
protected, matched_secrets = self.obfuscator.obfuscate(text)

assert count == 2
assert len(matched_secrets) == 2
assert "AKIAIOSFODNN7EXAMPLE" not in protected
assert "*" * 32 in protected

@@ -140,9 +140,9 @@ def test_obfuscate(self):

def test_obfuscate_no_secrets(self):
text = "Regular text without secrets"
protected, count = self.obfuscator.obfuscate(text)
protected, matched_secrets = self.obfuscator.obfuscate(text)

assert count == 0
assert len(matched_secrets) == 0
assert protected == text