-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathatandtscan.py
executable file
·186 lines (148 loc) · 6.2 KB
/
atandtscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from spark import GenericScanner
class Token:
def __init__(self, type, attr=None, lineno='???'):
self.type = type
self.attr = attr
self.lineno = lineno
def __cmp__(self, o):
return cmp(self.type, o)
###
def __repr__(self):
return str(self.type)
#So we can use this as a leaf - see release notes for SPARK
def __getitem__(self, i): raise IndexError
class LineScanner(GenericScanner):
def __init__(self):
GenericScanner.__init__(self)
self.lineno=1
def tokenize(self, input):
self.tokens = []
lines=input.split("\n")
for line in lines:
#remove inline comments
line=line.split("//")[0]
GenericScanner.tokenize(self, line)
self.lineno+=1
t=Token(type='NEWLINE',lineno=self.lineno)
self.tokens.append(t)
return self.tokens
#for setting the t_mnemonic line below
#note that popl must come before pop, and so on.
if 0:
import x86opcodes
l=x86opcodes.getAllMnemonics()
l.sort()
l.reverse()
print "r'%s'"%"|".join(l)
class ATTScanner(LineScanner):
"""
Scans for AT&T assembly code. Anything not recognized is a "label"
"""
def __init__(self):
LineScanner.__init__(self)
####OPCODES!
def t_mnemonic(self,s):
r'xorl|xorb|xor|xchg|test|subl|sub|shrl|shr|shll|shl|ret|pushl|push|popl|pop|orl|orb|or|nop|movw|movl|movb|mov|loopz|loopnz|loopne|loope|loop|leal|lea|jz|js|jpo|jpe|jp|jo|jnz|jns|jnp|jno|jnle|jnl|jnge|jng|jne|jnc|jnbe|jnb|jnae|jna|jmp|jle|jl|jge|jg|jecxz|je|jcxz|jc|jbe|jb|jae|ja|int3|int|incl|incb|inc|farret|decl|decb|dec|cwd|cmpw|cmpl|cmpb|cmp|cdql|cdq|call|andl|andb|and|addl|addb|add'
#see above to set this line - must be in a particular order, so don't add them manually.
t=Token(type='mnemonic',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_default(self,s):
r'[a-zA-Z][a-zA-Z0-9_]+'
#print "Default Matched: *%s*"%s
t=Token(type='name',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_comment(self,s):
r'//.*'
pass
def t_whitespace(self, s):
r'\s+'
pass
def t_star(self,s):
#these are used in front of calls, but we can just ignore them...
r'\*'
pass
def t_decnumber(self, s):
r'(?!0x)\d+'
t = Token(type='decnumber', attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_hexnumber(self,s):
r'0x[a-fA-F0-9]+'
t = Token(type='hexnumber', attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_colon(self,s):
r':'
t = Token(type=':', attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_reg(self,s):
r'%(eax|ebx|ecx|edx|esi|edi|esp|ebp|ax|bx|cx|al|ah|bl|bh|cl|ch|dl|dh|dx)'
t=Token(type='reg',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_segreg(self,s):
r'%(fs):'
t=Token(type='segreg',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_dollarsign(self,s):
r'\$'
t=Token(type='$',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_comma(self,s):
r','
t=Token(type=',',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_lparen(self,s):
r'\('
t=Token(type='(',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_rparen(self,s):
r'\)'
t=Token(type=')',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_plus(self,s):
r'\+'
t=Token(type='+',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_minus(self,s):
r'\-'
t=Token(type='-',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_quotedstring(self,s):
r'".*?"'
t=Token(type='quotedstring',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_asciidefine(self,s):
r'\.ascii'
t=Token(type='asciidefine',attr=s,lineno=self.lineno)
self.tokens.append(t)
def t_longdefine(self,s):
r'\.long'
t=Token(type='longdefine',attr=s,lineno=self.lineno)
self.tokens.append(t)
class strwreadline:
"""
Wraps a string up to be able to do a readline so you can tokenize it
"""
def __init__(self,s):
self.str=s
self.current=self.str
def readline(self,size=0):
print "Max Readline Size=%d"%size
index=self.current.find("\n")
if index==-1:
tmp=self.current
self.current=""
return tmp
else:
#print "Index=%d"%index
tmp=self.current[:index]
tmp=tmp.strip()+"\n"
#clear comments
if tmp[0:2]=="//":
tmp="\n"
self.current=self.current[index+1:]
print "Returning %s"%tmp
return tmp
def scan(f):
myscanner=ATTScanner()
mystr=strwreadline(f)
tokens=myscanner.tokenize(f)
return tokens