Skip to content

Commit 4b68306

Browse files
committed
Make sanitizer 50x faster
Signed-off-by: Denys Fedoryshchenko <[email protected]>
1 parent 22aa560 commit 4b68306

File tree

1 file changed

+40
-7
lines changed

1 file changed

+40
-7
lines changed

src/lava_callback.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,39 @@ def export(self):
117117
return promstr
118118

119119

120+
class LogSanitizer:
121+
"""Efficient log sanitization utility"""
122+
123+
# Pre-compute translation table
124+
_TRANSLATION_TABLE = str.maketrans({
125+
chr(i): '?'
126+
for i in range(256)
127+
if not (chr(i).isprintable() or chr(i) == '\n')
128+
})
129+
130+
@classmethod
131+
def sanitize(cls, data: str, max_size: int = None) -> str:
132+
"""
133+
Sanitize log data by replacing non-printable characters.
134+
135+
Args:
136+
data: Input string to sanitize
137+
max_size: Optional maximum size limit
138+
139+
Returns:
140+
Sanitized string
141+
"""
142+
if not data:
143+
return ''
144+
145+
# Truncate if needed
146+
if max_size and len(data) > max_size:
147+
data = data[:max_size]
148+
149+
# Remove null characters and apply translation
150+
return data.replace('\x00', '').translate(cls._TRANSLATION_TABLE)
151+
152+
120153
metrics = Metrics('pipeline_callback')
121154

122155

@@ -165,13 +198,11 @@ def _upload_log(log_parser, job_node, storage):
165198
data = log_parser.get_text()
166199
if not data or len(data) == 0:
167200
return None
168-
# Delete NULL characters from log data
169-
data = data.replace('\x00', '')
170-
# Sanitize log data from non-printable characters (except newline)
171-
# replace them with '?', original still exists in cb data
172-
data = ''.join([c if c.isprintable() or c == '\n' else
173-
'?' for c in data])
174-
f.write(data)
201+
202+
# Sanitize log data to remove non-printable characters
203+
sanitized_data = LogSanitizer.sanitize(data)
204+
205+
f.write(sanitized_data)
175206
src = os.path.join(tmp_dir, 'lava_log.txt.gz')
176207
return _upload_file(storage, job_node, src, 'log.txt.gz')
177208

@@ -766,6 +797,7 @@ async def apimetrics():
766797

767798
return Response(content=export_str, media_type='text/plain')
768799

800+
769801
@app.exception_handler(Exception)
770802
async def general_exception_handler(request: Request, exc: Exception):
771803
logging.error(f"Unhandled exception: {exc}")
@@ -774,6 +806,7 @@ async def general_exception_handler(request: Request, exc: Exception):
774806
content={"message": "Internal server error"}
775807
)
776808

809+
777810
# Default built-in development server, not suitable for production
778811
if __name__ == '__main__':
779812
tokens = SETTINGS.get(SETTINGS_PREFIX)

0 commit comments

Comments
 (0)