DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (c27d2bc955b6)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import argparse
import os
import requests
import urlparse
from multiprocessing import Pool

treeherder_base = "https://treeherder.mozilla.org/"

"""Simple script for downloading structured logs from treeherder.

For the moment this is specialised to work with web-platform-tests
logs; in due course it should move somewhere generic and get hooked
up to mach or similar"""

# Interpretation of the "job" list from
# https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18


def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("branch", action="store",
                        help="Branch on which jobs ran")
    parser.add_argument("commit",
                        action="store",
                        help="Commit hash for push")

    return parser


def download(url, prefix, dest, force_suffix=True):
    if dest is None:
        dest = "."

    if prefix and not force_suffix:
        name = os.path.join(dest, prefix + ".log")
    else:
        name = None
    counter = 0

    while not name or os.path.exists(name):
        counter += 1
        sep = "" if not prefix else "-"
        name = os.path.join(dest, prefix + sep + str(counter) + ".log")

    with open(name, "wb") as f:
        resp = requests.get(url, stream=True)
        for chunk in resp.iter_content(1024):
            f.write(chunk)


def fetch_json(url, params=None):
    headers = {
        'Accept': 'application/json',
        'User-Agent': 'wpt-fetchlogs',
    }
    response = requests.get(url=url, params=params, headers=headers, timeout=30)
    response.raise_for_status()
    return response.json()


def get_blobber_url(branch, job):
    job_guid = job["job_guid"]
    artifact_url = urlparse.urljoin(treeherder_base, "/api/jobdetail/")
    artifact_params = {
        'job_guid': job_guid,
    }
    job_data = fetch_json(artifact_url, params=artifact_params)

    if job_data:
        try:
            for item in job_data["results"]:
                if item["value"] == "wpt_raw.log" or item["value"] == "log_raw.log":
                    return item["url"]
        except:
            return None


def get_structured_logs(branch, commit, dest=None):
    resultset_url = urlparse.urljoin(treeherder_base, "/api/project/%s/resultset/" % branch)
    resultset_params = {
        'revision': commit,
    }
    revision_data = fetch_json(resultset_url, params=resultset_params)
    result_set = revision_data["results"][0]["id"]

    jobs_url = urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/" % branch)
    jobs_params = {
        'result_set_id': result_set,
        'count': 2000,
        'exclusion_profile': 'false',
    }
    job_data = fetch_json(jobs_url, params=jobs_params)

    tasks = []

    for result in job_data["results"]:
        job_type_name = result["job_type_name"]
        if (job_type_name.startswith("W3C Web Platform") or
            job_type_name.startswith("test-") and "-web-platform-tests-" in job_type_name):
            url = get_blobber_url(branch, result)
            if url:
                prefix = result["platform"] # platform
                tasks.append((url, prefix, None))

    for task in tasks:
        download(*task)

def main():
    parser = create_parser()
    args = parser.parse_args()

    get_structured_logs(args.branch, args.commit)

if __name__ == "__main__":
    main()