DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
from multiprocessing.pool import ThreadPool
import os
import re
import sys


sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))

from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
from mozharness.base.script import BaseScript
from mozharness.mozilla.aws import pop_aws_auth_from_env


class ReleasePusher(BaseScript, VirtualenvMixin):
    config_options = [
        [["--product"], {
            "dest": "product",
            "help": "Product being released, eg: firefox, thunderbird",
        }],
        [["--version"], {
            "dest": "version",
            "help": "Version of release, eg: 39.0b5",
        }],
        [["--build-number"], {
            "dest": "build_number",
            "help": "Build number of release, eg: 2",
        }],
        [["--bucket-name"], {
            "dest": "bucket_name",
            "help": "Bucket to copy files from candidates/ to releases/",
        }],
        [["--credentials"], {
            "dest": "credentials",
            "help": "File containing access key and secret access key",
        }],
        [["--exclude"], {
            "dest": "excludes",
            "default": [
                r"^.*tests.*$",
                r"^.*crashreporter.*$",
                r"^.*[^k]\.zip(\.asc)?$",
                r"^.*\.log$",
                r"^.*\.txt$",
                r"^.*/partner-repacks.*$",
                r"^.*.checksums(\.asc)?$",
                r"^.*/logs/.*$",
                r"^.*/jsshell.*$",
                r"^.*json$",
                r"^.*/host.*$",
                r"^.*/mar-tools/.*$",
                r"^.*robocop.apk$",
                r"^.*bouncer.apk$",
                r"^.*contrib.*",
                r"^.*/beetmover-checksums/.*$",
            ],
            "action": "append",
            "help": "List of patterns to exclude from copy. The list can be "
                    "extended by passing multiple --exclude arguments.",
        }],
        [["-j", "--parallelization"], {
            "dest": "parallelization",
            "default": 20,
            "type": "int",
            "help": "Number of copy requests to run concurrently",
        }],
    ] + virtualenv_config_options

    def __init__(self, aws_creds):
        BaseScript.__init__(self,
            config_options=self.config_options,
            require_config_file=False,
            config={
                "virtualenv_modules": [
                    "boto",
                    "redo",
                ],
                "virtualenv_path": "venv",
            },
            all_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
            default_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
        )

        # validate aws credentials
        if not (all(aws_creds) or self.config.get('credentials')):
            self.fatal("aws creds not defined. please add them to your config or env.")
        if any(aws_creds) and self.config.get('credentials'):
            self.fatal("aws creds found in env and self.config. please declare in one place only.")

        # set aws credentials
        if all(aws_creds):
            self.aws_key_id, self.aws_secret_key = aws_creds
        else:  # use
            self.aws_key_id, self.aws_secret_key = None, None
            # set the env var for boto to read our special config file
            # rather than anything else we have at ~/.boto
            os.environ["BOTO_CONFIG"] = os.path.abspath(self.config["credentials"])

    def _get_candidates_prefix(self):
        return "pub/{}/candidates/{}-candidates/build{}/".format(
            self.config['product'],
            self.config["version"],
            self.config["build_number"]
        )

    def _get_releases_prefix(self):
        return "pub/{}/releases/{}/".format(
            self.config["product"],
            self.config["version"]
        )

    def _matches_exclude(self, keyname):
        for exclude in self.config["excludes"]:
            if re.search(exclude, keyname):
                return True
        return False

    def push_to_releases(self):
        """This step grabs the list of files in the candidates dir,
        filters out any unwanted files from within them, and copies
        the remainder."""
        from boto.s3.connection import S3Connection
        from boto.exception import S3CopyError, S3ResponseError
        from redo import retry

        # suppress boto debug logging, it's too verbose with --loglevel=debug
        import logging
        logging.getLogger('boto').setLevel(logging.INFO)

        self.info("Connecting to S3")
        conn = S3Connection(aws_access_key_id=self.aws_key_id,
                            aws_secret_access_key=self.aws_secret_key)
        self.info("Getting bucket {}".format(self.config["bucket_name"]))
        bucket = conn.get_bucket(self.config["bucket_name"])

        # ensure the destination is empty
        self.info("Checking destination {} is empty".format(self._get_releases_prefix()))
        keys = [k for k in bucket.list(prefix=self._get_releases_prefix())]
        if keys:
            self.warning("Destination already exists with %s keys" % len(keys))

        def worker(item):
            source, destination = item

            def copy_key():
                source_key = bucket.get_key(source)
                dest_key = bucket.get_key(destination)
                # According to http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
                # S3 key MD5 is represented as ETag, except when objects are
                # uploaded using multipart method. In this case objects's ETag
                # is constructed using its MD5, minus symbol, and number of
                # part. See http://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb#answer-19896823
                source_md5 = source_key.etag.split("-")[0]
                if dest_key:
                    dest_md5 = dest_key.etag.split("-")[0]
                else:
                    dest_md5 = None

                if not dest_key:
                    self.info("Copying {} to {}".format(source, destination))
                    bucket.copy_key(destination, self.config["bucket_name"],
                                    source)
                elif source_md5 == dest_md5:
                    self.warning(
                        "{} already exists with the same content ({}), skipping copy".format(
                            destination, dest_md5))
                else:
                    self.fatal(
                        "{} already exists with the different content (src ETag: {}, dest ETag: {}), aborting".format(
                            destination, source_key.etag, dest_key.etag))

            return retry(copy_key, sleeptime=5, max_sleeptime=60,
                         retry_exceptions=(S3CopyError, S3ResponseError))

        def find_release_files():
            candidates_prefix = self._get_candidates_prefix()
            release_prefix = self._get_releases_prefix()
            self.info("Getting key names from candidates")
            for key in bucket.list(prefix=candidates_prefix):
                keyname = key.name
                if self._matches_exclude(keyname):
                    self.debug("Excluding {}".format(keyname))
                else:
                    destination = keyname.replace(candidates_prefix,
                                                  release_prefix)
                    yield (keyname, destination)

        pool = ThreadPool(self.config["parallelization"])
        pool.map(worker, find_release_files())

if __name__ == "__main__":
    myScript = ReleasePusher(pop_aws_auth_from_env())
    myScript.run_and_exit()