Skip to content

Commit d95bed4

Browse files
committed
Implement a toy python vm with a few basic operations
This basic python vm written in Python implements basic arithmetic operations and function call. These are just a small subset of operations that the actual CPython implements. The input to this VM is Python bytecode.
0 parents  commit d95bed4

23 files changed

+890
-0
lines changed

code.py

+190
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
""" module for constructing the code object """
2+
3+
import opcodes as opc
4+
import utils as utl
5+
6+
# named objects of the form name : object
7+
names = {}
8+
name_cnt = 0
9+
10+
11+
class Code(object):
12+
""" class implementing the code object """
13+
14+
def __init__(self, pyclist, cur=0):
15+
""" initializes the fields of the code object """
16+
self.pyclist = pyclist
17+
self.cur = cur
18+
self.code = self.acq_code()
19+
self.consts = self.acq_consts()
20+
self.names = self.acq_names()
21+
self.varnames = self.acq_varnames()
22+
self.name = self.acq_name()
23+
24+
def get_name(self):
25+
""" returns the name member """
26+
return self.name
27+
28+
def get_cur(self):
29+
""" returns the current position in the pyc_list"""
30+
return self.cur
31+
32+
def get_pyclist(self):
33+
""" returns the pyclist """
34+
return self.pyclist
35+
36+
def get_opcode(self, cur):
37+
""" returns the opcode at self.code[cur] """
38+
return self.code[cur]
39+
40+
def get_oparg(self, cur):
41+
""" returns the oparg of the opcode at cur """
42+
return utl.decimal(self.code, cur)
43+
44+
def is_end(self, cur):
45+
""" True if end of the code reached """
46+
if cur >= len(self.code):
47+
return True
48+
else:
49+
return False
50+
51+
def acq_code(self):
52+
""" constructs co_code field of the code object """
53+
pyclist = self.pyclist
54+
cur = utl.start_of_code(pyclist, self.cur)
55+
end = cur + utl.decimal(pyclist, cur-5, 4)
56+
code = []
57+
while cur < end:
58+
if not utl.is_func_def(cur, pyclist):
59+
if utl.have_arg(pyclist[cur]):
60+
code.extend(pyclist[cur:cur+3])
61+
cur += 3
62+
else:
63+
code.append(pyclist[cur])
64+
cur += 1
65+
else:
66+
code.append(opc.MAKE_FUNCTION)
67+
code.extend([0] * 8)
68+
cur += 9
69+
70+
self.cur = cur
71+
return code
72+
73+
def acq_consts(self):
74+
""" constructs co_consts of the code object """
75+
cur = self.cur
76+
pyclist = self.pyclist
77+
num_co = utl.decimal(pyclist, cur, 4)
78+
cur += 5
79+
consts = []
80+
for dummy in range(num_co):
81+
if pyclist[cur] == opc.TYPE_INTEGER:
82+
consts.append(utl.decimal(pyclist, cur, 4))
83+
cur += 5
84+
elif pyclist[cur] == opc.TYPE_NONE:
85+
consts.append(0)
86+
cur += 1
87+
elif pyclist[cur] == opc.TYPE_CODE:
88+
code_obj = Code(pyclist, cur)
89+
f_idx = code_obj.get_name()
90+
consts.append(code_obj)
91+
names[f_idx][0] = code_obj
92+
cur = utl.end_of_code(pyclist, cur)
93+
94+
self.cur = cur
95+
return consts
96+
97+
def acq_names(self):
98+
""" constructs co_names of the code object """
99+
global name_cnt
100+
cur = self.cur
101+
pyclist = self.pyclist
102+
n_names = utl.decimal(pyclist, cur)
103+
func_idx = 0
104+
cur += 5
105+
co_names = {}
106+
idx = 0
107+
for dummy in range(n_names):
108+
# first occurrence of a name
109+
if (pyclist[cur] == opc.TYPE_INTERN):
110+
names[name_cnt] = [0]
111+
co_names[idx] = names[name_cnt]
112+
name_cnt += 1
113+
idx += 1
114+
cur = utl.skip_element(pyclist, cur)
115+
elif (pyclist[cur] == opc.TYPE_SREF):
116+
func_idx = utl.decimal(pyclist, cur)
117+
co_names[idx] = names[func_idx]
118+
idx += 1
119+
cur += 5
120+
else:
121+
cur += 1
122+
123+
self.cur = cur
124+
return co_names
125+
126+
def acq_varnames(self):
127+
""" constructs co_varnames of the code object """
128+
global name_cnt
129+
cur = self.cur
130+
pyclist = self.pyclist
131+
varnames = []
132+
n_varnames = utl.decimal(pyclist, cur, 4)
133+
cur += 5
134+
for dummy in range(n_varnames):
135+
varnames.append(0)
136+
if pyclist[cur] == opc.TYPE_INTERN:
137+
names[name_cnt] = [0]
138+
name_cnt += 1
139+
cur = utl.skip_element(pyclist, cur)
140+
elif pyclist[cur] == opc.TYPE_SREF:
141+
cur += 5
142+
else:
143+
cur += 1
144+
145+
self.cur = cur
146+
return varnames
147+
148+
def acq_name(self):
149+
""" constructs name of the code object """
150+
global name_cnt
151+
cur = self.cur
152+
pyclist = self.pyclist
153+
n_field = 0
154+
# skip 2 (:28 s that is cellvars and freevars
155+
while True:
156+
if pyclist[cur] == opc.TYPE_TUPLE:
157+
n_field += 1
158+
if n_field == 2:
159+
break
160+
cur += 1
161+
162+
cur += 5
163+
# skip filenmae
164+
cur = utl.skip_element(pyclist, cur)
165+
self.cur = cur
166+
# getting the index of the name of the code
167+
if pyclist[cur] == opc.TYPE_INTERN:
168+
names[name_cnt] = [0]
169+
name_cnt += 1
170+
return name_cnt - 1
171+
172+
else:
173+
return utl.decimal(pyclist, cur, 4)
174+
175+
def view(self):
176+
""" shows the fields of the code object """
177+
print "****************"
178+
print utl.show_pyc(self.code)
179+
print len(self.consts), 'constants'
180+
for idx in range(len(self.consts)):
181+
if type(self.consts[idx]) == int:
182+
print self.consts[idx]
183+
else:
184+
self.consts[idx].view()
185+
print self.names
186+
print self.varnames
187+
print self.name
188+
print 'global names'
189+
print names
190+
print '--------------------------------'

opcodes.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
""" opcode symbols """
2+
LOAD_CONSTANT = 0x64
3+
LOAD_NAME = 0x65
4+
STORE_NAME = 0x5a
5+
PRINT_ITEM = 0x47
6+
PRINT_NEWLINE = 0x48
7+
COMPARE_OP = 0x6b
8+
BINARY_ADD = 0x17
9+
BINARY_MULTIPLY = 0x14
10+
BINARY_DIVIDE = 0x15
11+
BINARY_SUBTRACT = 0x18
12+
BINARY_MODULO = 0x16
13+
POP_JUMP_IF_FALSE = 0x72
14+
POP_JUMP_IF_TRUE = 0x73
15+
JUMP_FORWARD = 0x6e
16+
JUMP_ABSOLUTE = 0x71
17+
SETUP_LOOP = 0x78
18+
POP_BLOCK = 0x57
19+
MAKE_FUNCTION = 0x84
20+
RETURN_VALUE = 0x53
21+
UNARY_NOT = 0xc
22+
CALL_FUNCTION = 0x83
23+
LOAD_FAST = 0x7c
24+
STORE_FAST = 0x7d
25+
LOAD_GLOBAL = 0x74
26+
POP_TOP = 0x1
27+
28+
""" types """
29+
TYPE_TUPLE = 0x28
30+
TYPE_INTEGER = 0x69
31+
TYPE_STRING = 0x73
32+
TYPE_CODE = 0x63
33+
TYPE_NONE = 0x4e
34+
TYPE_INTERN = 0x74
35+
TYPE_SREF = 0x52
36+
FUNCTION_START = 0x43
37+
38+
""" constants """
39+
HAVE_ARG = 90

0 commit comments

Comments
 (0)