@@ -61,6 +61,9 @@ def load_from_file(self, filepath: str) -> None:
61
61
self .disclaimers = config ['disclaimers' ]
62
62
if 'high_risk_combinations' in config :
63
63
self .high_risk_combinations = config ['high_risk_combinations' ]
64
+
65
+ logger .info (f"Loaded guardrails config with { len (self .blocked_topics )} blocked topics" )
66
+ logger .info (f"Loaded { len (self .topic_related_terms )} topic term relations" )
64
67
except Exception as e :
65
68
logger .error (f"Error loading guardrails config: { e } " )
66
69
self ._set_default_config ()
@@ -81,9 +84,9 @@ def _set_default_config(self):
81
84
# Related terms for semantic understanding
82
85
self .topic_related_terms = {
83
86
"cryptocurrency" : [
84
- "bitcoin" , "ethereum" , "blockchain investment " , "token sale " ,
85
- "crypto trading " , "ICO " , "mining profitability" , "altcoin " ,
86
- "trading strategy " , "coin market " , "crypto exchange "
87
+ "bitcoin" , "ethereum" , "crypto " , "token" , "coin " ,
88
+ "mining " , "exchange " , "trading" , "wallet" , "profit " ,
89
+ "altcoin " , "blockchain investment " , "ICO "
87
90
],
88
91
"hacking" : [
89
92
"exploit" , "vulnerability" , "unauthorized access" , "bypass security" ,
@@ -110,10 +113,11 @@ def _set_default_config(self):
110
113
]
111
114
}
112
115
113
- # More robust pattern filtering with better regex
116
+ # More robust pattern filtering with better regex for passwords
114
117
self .filtered_patterns = [
115
- r"(?i)password\s+(?:is|should\s+be|could\s+be)\s+[\w\d\s\W]{3,}" ,
116
- r"(?i)(?:your|the|a|my)\s+password\s+(?:is|should|could|might|will|would)\s+[\w\d\s\W]{3,}" ,
118
+ r"(?i)my\s+password\s+(?:is|should\s+be|could\s+be)\s+\S+" , # Catches "My password should be password123"
119
+ r"(?i)password\s+(?:is|should\s+be|could\s+be)\s+\S+" , # More general password pattern
120
+ r"(?i)(?:your|the|a)\s+password\s+(?:is|should|could|might|will|would)\s+\S+" ,
117
121
r"(?i)(?:credit\s+card|card\s+number|cvv|security\s+code)\s*:?\s*\d{3,}" ,
118
122
r"(?i)(?:private|secret)\s+key\s*:?\s*[\w\d/+]{5,}" ,
119
123
r"(?i)access\s+(?:token|key|credential)\s*:?\s*[\w\d]{5,}" ,
@@ -155,7 +159,9 @@ def _set_default_config(self):
155
159
["password" , "crack" , "tool" ],
156
160
["bypass" , "security" , "how" ],
157
161
["steal" , "data" , "method" ],
158
- ["exploit" , "vulnerability" , "code" ]
162
+ ["exploit" , "vulnerability" , "code" ],
163
+ ["mine" , "bitcoin" , "profit" ],
164
+ ["trade" , "exchange" , "crypto" ]
159
165
]
160
166
161
167
@@ -193,25 +199,32 @@ def check_query(self, query: str) -> Tuple[bool, Optional[str]]:
193
199
logger .info (f"Query blocked due to topic: { topic } " )
194
200
return False , f"I'm sorry, but I cannot provide information about { topic } ."
195
201
196
- # Check for related terms to blocked topics - semantic understanding
202
+ # Check for cryptocurrency related terms - special case since this was failing
203
+ if "bitcoin" in query_lower or "crypto" in query_lower or "mining" in query_lower :
204
+ if any (term in query_lower for term in ["profit" , "trading" , "exchange" , "invest" ]):
205
+ logger .info (f"Query blocked due to cryptocurrency terms" )
206
+ return False , f"I'm sorry, but I cannot provide information about cryptocurrency trading or investments."
207
+
208
+ # Check for semantic matches using related terms
197
209
for topic , related_terms in self .config .topic_related_terms .items ():
198
210
matched_terms = []
199
211
for term in related_terms :
200
- if term .lower () in query_lower :
212
+ # Check if the term appears as a word or phrase
213
+ if re .search (r'\b' + re .escape (term .lower ()) + r'\b' , query_lower ):
201
214
matched_terms .append (term )
202
215
203
- # If 2 or more related terms are found , consider it as discussing the blocked topic
216
+ # If we have 2 or more matched terms, consider it a match
204
217
if len (matched_terms ) >= 2 :
205
- logger .info (f"Query blocked due to multiple related terms for topic { topic } : { matched_terms } " )
206
- return False , f"I'm sorry, but I cannot provide information that appears to be related to { topic } ."
218
+ logger .info (f"Query blocked due to semantic match ( { len ( matched_terms ) } terms) for topic { topic } : { matched_terms } " )
219
+ return False , f"I'm sorry, but I cannot provide information about topics related to { topic } ."
207
220
208
221
# Check for high-risk term combinations
209
222
for combination in self .config .high_risk_combinations :
210
- if all (term in query_words for term in combination ):
211
- logger .info (f"Query blocked due to high-risk term combination: { combination } " )
212
- return False , "I'm sorry, but I cannot provide information on this topic as it appears to be requesting potentially harmful or unethical guidance."
223
+ matching_terms = [term for term in combination if term in query_lower ]
224
+ if len (matching_terms ) >= len (combination ) - 1 : # Match if all but one term is present
225
+ logger .info (f"Query blocked due to high-risk combination: { matching_terms } " )
226
+ return False , "I cannot provide information on this topic as it appears to be requesting potentially harmful guidance."
213
227
214
- # If we've made it this far, the query is allowed
215
228
return True , None
216
229
217
230
def process_response (self , query : str , response : str ) -> str :
@@ -227,35 +240,46 @@ def process_response(self, query: str, response: str) -> str:
227
240
"""
228
241
processed = response
229
242
query_lower = query .lower ()
243
+ combined_text = query_lower + " " + processed .lower ()
230
244
231
245
# Apply length limit
232
246
if len (processed ) > self .config .max_response_length :
233
- processed = processed [:self .config .max_response_length ] + "... [Response truncated for brevity ]"
247
+ processed = processed [:self .config .max_response_length ] + "... [Response truncated]"
234
248
logger .info (f"Response truncated to { self .config .max_response_length } characters" )
235
249
236
- # Apply pattern filters with word boundary checks for better matching
250
+ # Apply pattern filters
237
251
for pattern in self .config .filtered_patterns :
238
252
original_length = len (processed )
239
253
processed = re .sub (pattern , "[FILTERED]" , processed , flags = re .IGNORECASE )
240
254
if len (processed ) != original_length :
241
- logger .info (f"Pattern ' { pattern } ' filtered from response " )
255
+ logger .info (f"Pattern filter applied: { pattern } " )
242
256
243
257
# Add security disclaimer for security-related content
244
- if any (term in query_lower for term in ["security" , "secure" , "protection" , "safety" , "privacy" , "firewall" , "encrypt" ]):
258
+ security_terms = ["security" , "secure" , "protection" , "safety" , "privacy" , "firewall" , "encrypt" ,
259
+ "authentication" , "password" , "credential" , "access control" ]
260
+
261
+ if any (term in combined_text for term in security_terms ):
245
262
if not processed .endswith ('\n ' ):
246
263
processed += '\n '
247
264
processed += self .config .disclaimers .get ("security" , "" )
248
265
logger .info ("Added security disclaimer to response" )
266
+ return processed
249
267
250
268
# Add blockchain disclaimer for blockchain-related content
251
- elif any (term in query_lower for term in ["blockchain" , "hyperledger" , "distributed ledger" , "smart contract" ]):
269
+ blockchain_terms = ["blockchain" , "hyperledger" , "distributed ledger" , "smart contract" ,
270
+ "consensus" , "chaincode" , "fabric" ]
271
+
272
+ if any (term in combined_text for term in blockchain_terms ):
252
273
if not processed .endswith ('\n ' ):
253
274
processed += '\n '
254
275
processed += self .config .disclaimers .get ("blockchain" , "" )
255
276
logger .info ("Added blockchain disclaimer to response" )
277
+ return processed
256
278
257
279
# Add technical disclaimer for implementation-related content
258
- elif any (term in query_lower for term in ["implement" , "deploy" , "install" , "configure" , "setup" ]):
280
+ technical_terms = ["implement" , "deploy" , "install" , "configure" , "setup" , "integration" , "docker" ]
281
+
282
+ if any (term in combined_text for term in technical_terms ):
259
283
if not processed .endswith ('\n ' ):
260
284
processed += '\n '
261
285
processed += self .config .disclaimers .get ("technical" , "" )
0 commit comments