DXR will be turned off on Tuesday, December 29th. It will redirect to Searchfox.
See the announcement on Discourse.

DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (2028a9d7ce91)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
import glob
import hashlib
from itertools import chain
import json
import logging
import os
import sys
import urllib2
import zipfile


logging.basicConfig(level=logging.INFO)

# each json file validates

json_files = sys.argv[1:] or glob.glob("*.json")

def missing(thing,keys):
    m = [k for k in keys if k not in thing]
    return m

def sign(path):
    fh = open(path, "rb")
    goodies = fh.read()
    fh.close()

    hash = hashlib.sha256(goodies).hexdigest()
    return hash

def jarfiles(jarpath):
    return [x.filename for x in zipfile.ZipFile(jarpath).filelist]

def main(json_files):
    for f in json_files:
        # json is good
        with open(f,'rb') as fh:
            try:
                obj = json.load(fh)
                logging.info("JSON is good")
            except ValueError, e:
                logging.warning("%s is invalid JSON.  Check trailing commas?: %s" ,f, e)
                continue

        # all names, jarnames, and study names are unique
        fieldnames = {}
        for r in chain(obj.get(u'maintain_experiments',[]), obj.get(u'new_experiments',[])):
            for field,val in r['default'].iteritems():
                if field == "jarfile" and '/' in val:
                    val = val[val.rindex('/')+1:]
                if field not in fieldnames:
                    fieldnames[field] = {}
                if val in fieldnames[field]:
                    fieldnames[field][val] += 1
                else:
                    fieldnames[field][val] = 1
        for field in fieldnames:
            for value, count in fieldnames[field].iteritems():
                if count > 1:
                    logging.warn('there are %s instances of the %s %s', \
                            count, field, value)
        
        # all hashes verify, jars exist, etc.
        for r in chain(obj.get(u'maintain_experiments',[]), obj.get(u'new_experiments',[])):
            o = r['default']
            m = missing(o,['studyfile','hash','name','jarfile'])
            if m and o.get('name', None) != "Dependencies":                                           
                logging.warn('study missing attributes: %(m)s:  %(o)s', locals())
            
            errors = []
            if not os.path.isfile(o['jarfile']):  
                logging.warn('not a file: %s', o['jarfile'])

            hash = sign(o['jarfile'])
            if hash != o['hash']:
                logging.warn('hash wrong (%s): |%s| |%s| (observed)', o['name'], o['hash'],hash)

            if not o.get('name',None) == "Dependencies":
                f = o.get('studyfile',None)
                if f is None:
                    logging.warn("should have study file.  studyfile in %r", r)
                
                if not f in jarfiles(o['jarfile']):
                    logging.warn('studyfile not found %s in jarfile %s', f, o['jarfile'])
                else:
                    logging.info("studyfile good %s", f)

        # urls exist
        gotten = set()
        for r in obj.get('results',[]):
            for k in ['url','thumbnail']:
                if r.get(k,None):
                    try:
                        url = r.get(k,None)
                        if url and url not in gotten:
                            urllib2.urlopen(url)
                            gotten.add(url)
                            logging.info("got %s", url) 
                    except Exception as exc:
                        logging.warn("failed to open: result: %s %s", r[k], exc)


            #(u'default': (u'studyfile': u'early_adopter_survey.js', u'hash': u'1cdc71cc9495481c225e2872e78cc4f88373bca55198ee8b8bf0e3d3e5260c2b', u'name': u'Tech Adoption Survey', u'jarfile': u'newfeature/early_adopter_survey.jar'))
                                 
if __name__ == "__main__":
    main(json_files)