Source code
Revision control
Copy as Markdown
Other Tools
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
"""
Downloads Heavy profiles from TaskCluster.
"""
import datetime
import functools
import os
import tarfile
from email.utils import parsedate
import requests
from mozlog import get_proxy_logger
from requests.adapters import HTTPAdapter
LOG = get_proxy_logger()
TC_LINK = (
"artifacts/public/today-%s.tgz"
)
class ProgressBar(object):
def __init__(self, size, template="\r%d%%"):
self.size = size
self.current = 0
self.tens = 0
self.template = template
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return False
def incr(self):
if self.current == self.size:
return
percent = float(self.current) / float(self.size) * 100
tens, __ = divmod(percent, 10)
if tens > self.tens:
LOG.info(self.template % percent)
self.tens = tens
self.current += 1
def follow_redirects(url, max=3):
location = url
current = 0
page = requests.head(url)
while page.status_code == 303 and current < max:
current += 1
location = page.headers["Location"]
page = requests.head(location)
if page.status_code == 303 and current == max:
raise ValueError("Max redirects Reached")
last_modified = page.headers.get("Last-Modified", None)
if last_modified is not None:
last_modified = datetime.datetime(*parsedate(last_modified)[:6])
return location, last_modified
def _recursive_mtime(path):
max = os.path.getmtime(path)
for root, dirs, files in os.walk(path):
for element in dirs + files:
age = os.path.getmtime(os.path.join(root, element))
if age > max:
max = age
return max
def profile_age(profile_dir, last_modified=None):
if last_modified is None:
last_modified = datetime.datetime.now()
profile_ts = _recursive_mtime(profile_dir)
profile_ts = datetime.datetime.fromtimestamp(profile_ts)
return (last_modified - profile_ts).days
def download_profile(name, profiles_dir=None):
if profiles_dir is None:
profiles_dir = os.path.join(os.path.expanduser("~"), ".mozilla", "profiles")
profiles_dir = os.path.abspath(profiles_dir)
if not os.path.exists(profiles_dir):
os.makedirs(profiles_dir)
target = os.path.join(profiles_dir, name)
url = TC_LINK % name
cache_dir = os.path.join(profiles_dir, ".cache")
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
archive_file = os.path.join(cache_dir, "today-%s.tgz" % name)
url, last_modified = follow_redirects(url)
if os.path.exists(target):
age = profile_age(target, last_modified)
if age < 7:
# profile is not older than a week, we're good
LOG.info("Local copy of %r is fresh enough" % name)
LOG.info("%d days old" % age)
return target
LOG.info("Downloading from %r" % url)
session = requests.Session()
session.mount("https://", HTTPAdapter(max_retries=5))
req = session.get(url, stream=True, timeout=20)
req.raise_for_status()
total_length = int(req.headers.get("content-length"))
# XXX implement Range to resume download on disconnects
template = "Download progress %d%%"
with open(archive_file, "wb") as f:
iter = req.iter_content(chunk_size=1024)
# pylint --py3k W1619
size = total_length / 1024 + 1
with ProgressBar(size=size, template=template) as bar:
for chunk in iter:
if chunk:
f.write(chunk)
bar.incr()
LOG.info("Extracting profile in %r" % target)
template = "Extraction progress %d%%"
with tarfile.open(archive_file, "r:gz") as tar:
LOG.info("Checking the tarball content...")
size = len(list(tar))
with ProgressBar(size=size, template=template) as bar:
def _extract(self, *args, **kw):
bar.incr()
return self.old(*args, **kw)
tar.old = tar.extract
tar.extract = functools.partial(_extract, tar)
tar.extractall(target)
LOG.info("Profile downloaded.")
return target