DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (5216dd412535)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
#!/usr/bin/env python

"""%prog [options] shellpath dirpath

Pulls performance data on parsing via the js shell.
Displays the average number of milliseconds it took to parse each file.

For comparison, something apparently approximating a t-test is performed:
"Faster" means that:

    t_baseline_goodrun = (t_baseline_avg - t_baseline_stddev)
    t_current_badrun = (t_current_avg + t_current_stddev) 
    t_current_badrun < t_baseline_goodrun

Effectively, a bad run from the current data is better than a good run from the
baseline data, we're probably faster. A similar computation is used for
determining the "slower" designation.

Arguments:
  shellpath             executable JavaScript shell
  dirpath               directory filled with parsilicious js files
"""

import math
import optparse
import os
import subprocess as subp
import sys
from string import Template

try:
    import compare_bench
except ImportError:
    compare_bench = None


_DIR = os.path.dirname(__file__)
JS_CODE_TEMPLATE = Template("""
if (typeof snarf !== 'undefined') read = snarf
var contents = read("$filepath");
for (var i = 0; i < $warmup_run_count; i++)
    parse(contents);
var results = [];
for (var i = 0; i < $real_run_count; i++) {
    var start = new Date();
    parse(contents);
    var end = new Date();
    results.push(end - start);
}
print(results);
""")


def gen_filepaths(dirpath, target_ext='.js'):
    for filename in os.listdir(dirpath):
        if filename.endswith(target_ext):
            yield os.path.join(dirpath, filename)


def avg(seq):
    return sum(seq) / len(seq)


def stddev(seq, mean):
    diffs = ((float(item) - mean) ** 2 for item in seq)
    return math.sqrt(sum(diffs) / len(seq))


def bench(shellpath, filepath, warmup_runs, counted_runs, stfu=False):
    """Return a list of milliseconds for the counted runs."""
    assert '"' not in filepath
    code = JS_CODE_TEMPLATE.substitute(filepath=filepath,
            warmup_run_count=warmup_runs, real_run_count=counted_runs)
    proc = subp.Popen([shellpath, '-e', code], stdout=subp.PIPE)
    stdout, _ = proc.communicate()
    milliseconds = [float(val) for val in stdout.split(',')]
    mean = avg(milliseconds)
    sigma = stddev(milliseconds, mean)
    if not stfu:
        print 'Runs:', [int(ms) for ms in milliseconds]
        print 'Mean:', mean
        print 'Stddev: %.2f (%.2f%% of mean)' % (sigma, sigma / mean * 100)
    return mean, sigma


def parsemark(filepaths, fbench, stfu=False):
    """:param fbench: fbench(filename) -> float"""
    bench_map = {} # {filename: (avg, stddev)}
    for filepath in filepaths:
        filename = os.path.split(filepath)[-1]
        if not stfu:
            print 'Parsemarking %s...' % filename
        bench_map[filename] = fbench(filepath)
    print '{'
    for i, (filename, (avg, stddev)) in enumerate(bench_map.iteritems()):
        assert '"' not in filename
        fmt = '    %30s: {"average_ms": %6.2f, "stddev_ms": %6.2f}'
        if i != len(bench_map) - 1:
            fmt += ','
        filename_str = '"%s"' % filename
        print fmt % (filename_str, avg, stddev)
    print '}'
    return dict((filename, dict(average_ms=avg, stddev_ms=stddev))
            for filename, (avg, stddev) in bench_map.iteritems())


def main():
    parser = optparse.OptionParser(usage=__doc__.strip())
    parser.add_option('-w', '--warmup-runs', metavar='COUNT', type=int,
            default=5, help='used to minimize test instability [%default]')
    parser.add_option('-c', '--counted-runs', metavar='COUNT', type=int,
            default=50, help='timed data runs that count towards the average [%default]')
    parser.add_option('-s', '--shell', metavar='PATH', help='explicit shell '
            'location; when omitted, will look in likely places')
    parser.add_option('-b', '--baseline', metavar='JSON_PATH',
            dest='baseline_path', help='json file with baseline values to '
            'compare against')
    parser.add_option('-q', '--quiet', dest='stfu', action='store_true',
            default=False, help='only print JSON to stdout [%default]')
    options, args = parser.parse_args()
    try:
        shellpath = args.pop(0)
    except IndexError:
        parser.print_help()
        print
        print >> sys.stderr, 'error: shellpath required'
        return -1
    try:
        dirpath = args.pop(0)
    except IndexError:
        parser.print_help()
        print
        print >> sys.stderr, 'error: dirpath required'
        return -1
    if not shellpath or not os.path.exists(shellpath):
        print >> sys.stderr, 'error: could not find shell:', shellpath
        return -1
    if options.baseline_path:
        if not os.path.isfile(options.baseline_path):
            print >> sys.stderr, 'error: baseline file does not exist'
            return -1
        if not compare_bench:
            print >> sys.stderr, 'error: JSON support is missing, cannot compare benchmarks'
            return -1
    benchfile = lambda filepath: bench(shellpath, filepath,
            options.warmup_runs, options.counted_runs, stfu=options.stfu)
    bench_map = parsemark(gen_filepaths(dirpath), benchfile, options.stfu)
    if options.baseline_path:
        compare_bench.compare_immediate(bench_map, options.baseline_path)
    return 0


if __name__ == '__main__':
    sys.exit(main())