Add old measure_loc.py script from pyston v1

kmod · kmod · commit 3b029270c757 · 2020-10-23T13:16:18.000-04:00
And update for Python 3 https://blog.pyston.org/2014/12/05/python-benchmark-sizes/ Benchmark sizes (LOC for 99% coverage): pylint_bench: 3945 mypy_bench: 361 djangocms: 1988 flaskblogging: 711
diff --git a/tools/measure_loc.py b/tools/measure_loc.py
@@ -0,0 +1,219 @@
+"""
+Simple "active LOC" analyzer
+
+Runs a program, and using a sampling profiler, outputs some statistics about how many
+lines of code contribute to the majority of the runtime.
+
+For example:
+$ python tools/measure_loc.py minibenchmarks/fannkuch_med.py
+[...]
+Found 36 unique lines with 116480 samples
+minibenchmarks/fannkuch_med.py:28                   48244 41.4%   1 41.4%
+minibenchmarks/fannkuch_med.py:36                   18703 16.1%   2 57.5%
+minibenchmarks/fannkuch_med.py:30                   8835  7.6%   3 65.1%
+minibenchmarks/fannkuch_med.py:37                   6388  5.5%   4 70.5%
+minibenchmarks/fannkuch_med.py:29                   5348  4.6%   5 75.1%
+minibenchmarks/fannkuch_med.py:27                   4562  3.9%   6 79.1%
+minibenchmarks/fannkuch_med.py:20                   3599  3.1%   7 82.1%
+minibenchmarks/fannkuch_med.py:21                   2985  2.6%   8 84.7%
+minibenchmarks/fannkuch_med.py:26                   2984  2.6%   9 87.3%
+minibenchmarks/fannkuch_med.py:23                   2835  2.4%  10 89.7%
+minibenchmarks/fannkuch_med.py:24                   2781  2.4%  11 92.1%
+minibenchmarks/fannkuch_med.py:40                   2089  1.8%  12 93.9%
+minibenchmarks/fannkuch_med.py:38                   2038  1.7%  13 95.6%
+minibenchmarks/fannkuch_med.py:35                   1990  1.7%  14 97.3%
+minibenchmarks/fannkuch_med.py:19                   1769  1.5%  15 98.9%
+minibenchmarks/fannkuch_med.py:39                   1108  1.0%  16 99.8%
+minibenchmarks/fannkuch_med.py:42                   179  0.2%  17 100.0%
+minibenchmarks/fannkuch_med.py:49                   10  0.0%  18 100.0%
+minibenchmarks/fannkuch_med.py:51                    7  0.0%  19 100.0%
+/usr/lib/python2.7/runpy.py:220                      3  0.0%  20 100.0%
+(and 16 more -- see measure_loc.pkl)
+Picked 2 lines out of 36 to reach 57.48%
+Picked 5 lines out of 36 to reach 75.14%
+Picked 11 lines out of 36 to reach 92.09%
+Picked 16 lines out of 36 to reach 99.81%
+
+
+By default, this tool reports lines of code by the amount of time that was spent on them.
+There is also a mode to change the accounting to "number of times the line executed";
+use the python_trace_counter instead of python_sampler (you have to modify the script).
+"""
+
+
+import os
+import pickle
+import runpy
+import signal
+import sys
+import time
+import traceback
+
+class SamplingProfiler(object):
+    # Copied + modified from https://github.com/bdarnell/plop/blob/master/plop/collector.py
+    MODES = {
+        'prof': (signal.ITIMER_PROF, signal.SIGPROF),
+        'virtual': (signal.ITIMER_VIRTUAL, signal.SIGVTALRM),
+        'real': (signal.ITIMER_REAL, signal.SIGALRM),
+        }
+
+    def __init__(self, sighandler, dumper, mode, interval=0.0001):
+        self.sighandler = sighandler
+        self.dumper = dumper
+        self.mode = mode
+        self.interval = interval
+
+    def start(self):
+        timer, sig = SamplingProfiler.MODES[self.mode]
+
+        signal.signal(sig, signal_handler)
+        signal.setitimer(timer, self.interval, self.interval)
+
+    def stop(self):
+        timer, sig = SamplingProfiler.MODES[self.mode]
+        signal.setitimer(timer, 0, 0)
+        signal.signal(sig, signal.SIG_DFL)
+        return self.dumper()
+
+# Try to prevent / notice if someone else sets a debugger.
+# (Note: removing sys.settrace is not sufficient since one can set
+# frame.f_trace)
+sys_settrace = sys.settrace
+sys.settrace = None
+import bdb
+bdb.Bdb.set_trace = None
+bdb.set_trace = None
+import pdb
+pdb.set_trace = None
+pdb.Pdb.set_trace = None
+
+class TracingProfiler(object):
+    def __init__(self, tracefunc, dumper):
+        self.tracefunc = tracefunc
+        self.dumper = dumper
+
+    def start(self):
+        sys_settrace(self.tracefunc)
+
+    def stop(self):
+        assert sys.gettrace() == self.tracefunc, "Problem!  Someone/something removed our tracer.  It's now: %r" % sys.gettrace()
+        sys_settrace(None)
+        return self.dumper()
+
+times = {}
+start_time = time.time()
+SKIP_WARMUP = 0
+def signal_handler(sig, frame):
+    if time.time() >= start_time + SKIP_WARMUP:
+        # print("Starting sampling")
+        def real_signal_handler(sig, frame):
+            loc = frame.f_code.co_filename, frame.f_lineno
+            times[loc] = times.get(loc, 0) + 1
+
+        signal.signal(sig, real_signal_handler)
+        real_signal_handler(sig, frame)
+    return
+
+def trace_count(frame, event, arg):
+    if event == "line":
+        loc = frame.f_code.co_filename, frame.f_lineno
+        times[loc] = times.get(loc, 0) + 1
+
+    return trace_count
+
+def get_times():
+    return list(times.items())
+
+def run(sampler, kind):
+    fn = sys.argv[1]
+
+    if fn == '-m':
+        module = sys.argv[2]
+        args = sys.argv[3:]
+    else:
+        args = sys.argv[2:]
+    sys.argv = [sys.argv[0]] + args
+
+    sys.path[0] = os.path.abspath(os.path.dirname(fn))
+
+    sampler.start()
+
+    # del sys.modules["__main__"] # do we need this?
+    try:
+        if fn == '-m':
+            runpy.run_module(module, run_name="__main__")
+        else:
+            runpy.run_path(fn, run_name="__main__")
+    except KeyboardInterrupt:
+        print("Interrupted!")
+        traceback.print_exc()
+    except SystemExit:
+        pass
+    except:
+        print("ERROR!")
+        traceback.print_exc()
+
+    print("Stopping timer and tallying statistics...")
+    times = sampler.stop()
+
+    times.sort(key=lambda p: p[1], reverse=True)
+    with open("measure_loc.pkl", "wb") as f:
+        pickle.dump(times, f)
+
+    total = 0.0
+    for l, t in times:
+        total += t
+    if kind == "time":
+        print("Found %d unique lines for a total of %.2fs" % (len(times), total))
+    else:
+        print("Found %d unique lines with %d samples" % (len(times), total))
+
+    FRACTIONS = [0.5, 0.75, 0.9, 0.99, 1]
+    frac_counts = []
+    frac_fracs = []
+    frac_idx = 0
+    DISPLAY_THRESH = 20
+
+    sofar = 0.0
+    total_lines = 0
+    for (l, t) in times:
+        if not l:
+            continue
+        fn, lineno = l
+        total_lines += 1
+        sofar += t
+        if total_lines <= DISPLAY_THRESH:
+            if kind == "time":
+                print(("%s:%s" % (fn, lineno)).ljust(50), "%.4fs %4.1f%% % 3d %4.1f%%" % (t, t / total * 100, total_lines, sofar / total * 100.0))
+            else:
+                print(("%s:%s" % (fn, lineno)).ljust(50), "% 3d %4.1f%% % 3d %4.1f%%" % (t, t / total * 100, total_lines, sofar / total * 100.0))
+        if sofar >= total * FRACTIONS[frac_idx]:
+            if FRACTIONS[frac_idx] == 1:
+                break
+
+            frac_counts.append(total_lines)
+            frac_fracs.append(sofar)
+            frac_idx += 1
+
+    if len(times) > DISPLAY_THRESH:
+        print("(and %d more -- see measure_loc.pkl)" % (len(times) - DISPLAY_THRESH))
+
+    assert len(frac_counts) == len(FRACTIONS) -1
+    for i in range(len(frac_counts)):
+        print("Picked %d lines out of %d to reach %.2f%%" % (frac_counts[i], len(times), frac_fracs[i] / total * 100.0))
+
+python_sampler = SamplingProfiler(signal_handler, get_times, "real", interval=0.0001)
+python_trace_counter = TracingProfiler(trace_count, get_times)
+try:
+    import measure_loc_ext
+    cext_trace_timer = TracingProfiler(measure_loc_ext.trace, lambda: measure_loc_ext.get_times().items())
+except ImportError:
+    print("(extension module not available)")
+
+if __name__ == "__main__":
+    if sys.argv[1] == '-t':
+        del sys.argv[1]
+        run(cext_trace_timer, "time")
+    else:
+        run(python_sampler, "count")
+    # run(python_trace_counter, "count")