Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Allow checking vmprof output in progress. #139

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/symboltable.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,13 @@ void vmp_scan_profile(int fileno, int dump_nat_sym, void *all_code_uids)
khash_t(ptr) * nat_syms = kh_init(ptr);
khiter_t it;

#ifndef RPYTHON_VMPROF
PyObject *all_done_uids = NULL;
if (all_code_uids != NULL) {
all_done_uids = PySet_New(NULL);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this function we cant use CPython API. It will not work on PyPy. I have used khash for the native symbols already (to avoid duplication). We can reuse khash for that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, here's my confusion - we already use CPython API further down the line, similarly if an #ifndef RPYTHON_VMPROF block. :(

...so, is the right fix to change both?

Or is the other one special enough to be kept around?

}
#endif

lseek(fileno, 5*WORD_SIZE, SEEK_SET);

while (1) {
Expand Down Expand Up @@ -413,7 +420,16 @@ void vmp_scan_profile(int fileno, int dump_nat_sym, void *all_code_uids)
} case MARKER_VIRTUAL_IP:
case MARKER_NATIVE_SYMBOLS: {
//LOG("virtip 0x%llx\n", cur_pos);
#ifndef RPYTHON_VMPROF
void * addr = _read_addr(fileno);
if (all_done_uids != NULL) {
PyObject *co_uid = PyLong_FromVoidPtr(addr);
int check = PySet_Add(all_done_uids, co_uid);
Py_CLEAR(co_uid);
}
#else
if (_skip_addr(fileno) != 0) { return; }
#endif
if (_skip_string(fileno) != 0) { return; }
break;
} case MARKER_STACKTRACE: {
Expand Down Expand Up @@ -496,6 +512,17 @@ void vmp_scan_profile(int fileno, int dump_nat_sym, void *all_code_uids)
}
}

#ifndef RPYTHON_VMPROF
if (all_done_uids != NULL) {
while (PySet_GET_SIZE(all_done_uids)) {
PyObject *co_uid = PySet_Pop(all_done_uids);
PySet_Discard(all_code_uids, co_uid);
Py_CLEAR(co_uid);
}
Py_CLEAR(all_done_uids);
}
#endif

kh_destroy(ptr, nat_syms);
lseek(fileno, 0, SEEK_END);
}
Expand Down
125 changes: 77 additions & 48 deletions vmprof/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class JittedCode(int):
class NativeCode(int):
pass

class IncompleteRead(Exception):
pass

def wrap_kind(kind, pc):
if kind == VMPROF_ASSEMBLER_TAG:
return AssemblerCode(pc)
Expand Down Expand Up @@ -100,7 +103,7 @@ def __init__(self, fileobj, state):
self.addr_size = None

def detect_file_sizes(self):
self.fileobj.seek(0, os.SEEK_SET)
self.seek()
firstbytes = self.read(8)
three = '\x03' if not PY3 else 3
little = None
Expand All @@ -125,20 +128,31 @@ def detect_file_sizes(self):
# even though it migt be a 64bit log, teh addr_size is now 4
if self.addr_size == 4:
# read further
self.fileobj.seek(0, os.SEEK_SET)
self.seek()
self.read(4*4)
windows64 = self.read_word() == 1
if windows64:
self.setup_once(little_endian=little, word_size=4, addr_size=8)

self.fileobj.seek(0, os.SEEK_SET)
self.seek()

def setup_once(self, little_endian, word_size, addr_size):
self.little_endian = little_endian
assert self.little_endian, "big endian profile are not supported"
self.word_size = word_size
self.addr_size = addr_size

def read(self, count):
data = self.fileobj.read(count)
size = len(data)
if count != size:
raise IncompleteRead("expected %d, read %d" % (count, size))
else:
return data

def seek(self, position=0, offset=os.SEEK_SET):
return self.fileobj.seek(position, offset)

def read_static_header(self):
assert self.read_word() == 0 # header count
assert self.read_word() == 3 # header size
Expand Down Expand Up @@ -170,23 +184,20 @@ def read_header(self):

def read_addr(self):
if self.addr_size == 8:
return struct.unpack('<q', self.fileobj.read(8))[0]
return struct.unpack('<q', self.read(8))[0]
elif self.addr_size == 4:
return struct.unpack('<l', self.fileobj.read(4))[0]
return struct.unpack('<l', self.read(4))[0]
else:
raise NotImplementedError("did not implement size %d" % self.size)

def read_word(self):
if self.word_size == 8:
return struct.unpack('<q', self.fileobj.read(8))[0]
return struct.unpack('<q', self.read(8))[0]
elif self.word_size == 4:
return struct.unpack('<l', self.fileobj.read(4))[0]
return struct.unpack('<l', self.read(4))[0]
else:
raise NotImplementedError("did not implement size %d" % self.size)

def read(self, count):
return self.fileobj.read(count)

def read_string(self):
cnt = self.read_word()
bytes = self.read(cnt)
Expand Down Expand Up @@ -223,7 +234,7 @@ def read_addresses(self, count):
return addrs

def read_s64(self):
return struct.unpack('q', self.fileobj.read(8))[0]
return struct.unpack('q', self.read(8))[0]

def read_time_and_zone(self):
return datetime.datetime.fromtimestamp(
Expand All @@ -248,55 +259,73 @@ def read_timezone(self):
return None
return None

def read_block_header(self):
assert not self.state.version, "multiple headers"
self.read_header()

def read_block_meta(self):
key = self.read_string()
value = self.read_string()
assert not key in self.state.meta, "key duplication, %s already present" % (key,)
self.state.meta[key] = value

def read_block_time_and_zone(self):
self.state.start_time = self.read_time_and_zone()

def read_block_stack_trace(self):
count = self.read_word()
# for now
assert count == 1
depth = self.read_word()
assert depth <= 2**16, 'stack strace depth too high'
trace = self.read_trace(depth)
thread_id = 0
mem_in_kb = 0
if self.state.version >= VERSION_THREAD_ID:
thread_id = self.read_addr()
if self.state.profile_memory:
mem_in_kb = self.read_addr()
trace.reverse()
self.state.profiles.append((trace, 1, thread_id, mem_in_kb))

def read_block_symbols(self):
unique_id = self.read_addr()
name = self.read_string()
self.state.virtual_ips.append((unique_id, name))

def read_block_trailer(self):
if self.state.version >= VERSION_DURATION:
self.state.end_time = self.read_time_and_zone()

def read_all(self):
s = self.state
fileobj = self.fileobj

self.detect_file_sizes()
self.read_static_header()

function_lookup = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should have been done this way some time ago...

MARKER_HEADER: self.read_block_header,
MARKER_META: self.read_block_meta,
MARKER_TIME_N_ZONE: self.read_block_time_and_zone,
MARKER_STACKTRACE: self.read_block_stack_trace,
MARKER_VIRTUAL_IP: self.read_block_symbols,
MARKER_NATIVE_SYMBOLS: self.read_block_symbols,
MARKER_TRAILER: self.read_block_trailer,
}

while True:
marker = fileobj.read(1)
if marker == MARKER_HEADER:
assert not s.version, "multiple headers"
self.read_header()
elif marker == MARKER_META:
key = self.read_string()
value = self.read_string()
assert not key in s.meta, "key duplication, %s already present" % (key,)
s.meta[key] = value
elif marker == MARKER_TIME_N_ZONE:
s.start_time = self.read_time_and_zone()
elif marker == MARKER_STACKTRACE:
count = self.read_word()
# for now
assert count == 1
depth = self.read_word()
assert depth <= 2**16, 'stack strace depth too high'
trace = self.read_trace(depth)
thread_id = 0
mem_in_kb = 0
if s.version >= VERSION_THREAD_ID:
thread_id = self.read_addr()
if s.profile_memory:
mem_in_kb = self.read_addr()
trace.reverse()
s.profiles.append((trace, 1, thread_id, mem_in_kb))
elif marker == MARKER_VIRTUAL_IP or marker == MARKER_NATIVE_SYMBOLS:
unique_id = self.read_addr()
name = self.read_string()
s.virtual_ips.append((unique_id, name))
elif marker == MARKER_TRAILER:
#if not virtual_ips_only:
# symmap = read_ranges(fileobj.read())
if s.version >= VERSION_DURATION:
s.end_time = self.read_time_and_zone()
handle = function_lookup.get(marker)
if handle is None:
assert not marker, (fileobj.tell(), repr(marker))
break
else:
assert not marker, (fileobj.tell(), repr(marker))
handle()
if marker == MARKER_TRAILER:
break

s.virtual_ips.sort() # I think it's sorted, but who knows
self.state.virtual_ips.sort() # I think it's sorted, but who knows


class ReaderState(object):
pass
Expand Down
6 changes: 4 additions & 2 deletions vmprof/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ def __init__(self, profiles, adr_dict=None, jit_frames=None, interp=None,
if state:
self.profile_lines = state.profile_lines
self.profile_memory = state.profile_memory
self.adr_size = len(state.virtual_ips)
else:
# unkown, for tests only
self.profile_lines = False
self.profile_memory = False
self.profile_lines = None
self.profile_memory = None
self.adr_size = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we change adr_size or remove it? it is simply the len of state.virtual_ips... it should not be confused with the byte size of the addresses contained in the profile. We could turn that into a method with a longer name...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that particular thing I added literally only for one test, I suppose we could rework that test instead to use a lower-level API?

self.generate_top()
if jit_frames is None:
jit_frames = set()
Expand Down
5 changes: 5 additions & 0 deletions vmprof/test/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,14 @@ def test_only_needed():
with prof.measure(only_needed=True):
function_foo()
stats_two = prof.get_stats()
with prof.measure(only_needed=True):
vmprof._vmprof.write_all_code_objects()
function_foo()
stats_all = prof.get_stats()
assert foo_full_name in stats_one.adr_dict.values()
assert foo_full_name in stats_two.adr_dict.values()
assert len(stats_one.adr_dict) > len(stats_two.adr_dict)
assert stats_one.adr_size == stats_all.adr_size

def test_nested_call():
prof = vmprof.Profiler()
Expand Down