handlers.py - mozsearch

mozilla-central/testing/web-platform/tests/tools/wptserve/wptserve/handlers.py (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Testing :: web-platform-tests

Revision control

Copy as Markdown

Other Tools

# mypy: allow-untyped-defs

import json

import os

import pathlib

from collections import defaultdict

from urllib.parse import quote, unquote, urljoin

from .constants import content_types

from .pipes import Pipeline, template

from .ranges import RangeParser

from .request import Authentication

from .response import MultipartContent

from .utils import HTTPException

from html import escape

__all__ = ["file_handler", "python_script_handler",

           "FunctionHandler", "handler", "json_handler",

           "as_is_handler", "ErrorHandler", "BasicAuthHandler"]

def guess_content_type(path):

    ext = os.path.splitext(path)[1].lstrip(".")

    if ext in content_types:

        return content_types[ext]

    return "application/octet-stream"

def filesystem_path(base_path, request, url_base="/"):

    if base_path is None:

        base_path = request.doc_root

    path = unquote(request.url_parts.path)

    if path.startswith(url_base):

        path = path[len(url_base):]

    if ".." in path:

        raise HTTPException(404)

    new_path = os.path.join(base_path, path)

    # Otherwise setting path to / allows access outside the root directory

    if not new_path.startswith(base_path):

        raise HTTPException(404)

    return new_path

class DirectoryHandler:

    def __init__(self, base_path=None, url_base="/"):

        self.base_path = base_path

        self.url_base = url_base

    def __repr__(self):

        return "<%s base_path:%s url_base:%s>" % (self.__class__.__name__, self.base_path, self.url_base)

    def __call__(self, request, response):

        url_path = request.url_parts.path

        if not url_path.endswith("/"):

            response.status = 301

            response.headers = [("Location", "%s/" % request.url)]

            return

        path = filesystem_path(self.base_path, request, self.url_base)

        assert os.path.isdir(path)

        response.headers = [("Content-Type", "text/html")]

        response.content = """<!doctype html>

<meta name="viewport" content="width=device-width">

<title>Directory listing for %(path)s</title>

<h1>Directory listing for %(path)s</h1>

<ul>

%(items)s

</ul>

""" % {"path": escape(url_path),

       "items": "\n".join(self.list_items(url_path, path))}  # noqa: E122

    def list_items(self, base_path, path):

        assert base_path.endswith("/")

        # TODO: this won't actually list all routes, only the

        # ones that correspond to a real filesystem path. It's

        # not possible to list every route that will match

        # something, but it should be possible to at least list the

        # statically defined ones

        if base_path != "/":

            link = urljoin(base_path, "..")

            yield ("""<li class="dir"><a href="%(link)s">%(name)s</a></li>""" %

                   {"link": link, "name": ".."})

        items = []

        prev_item = None

        # This ensures that .headers always sorts after the file it provides the headers for. E.g.,

        # if we have x, x-y, and x.headers, the order will be x, x.headers, and then x-y.

        for item in sorted(os.listdir(path), key=lambda x: (x[:-len(".headers")], x) if x.endswith(".headers") else (x, x)):

            if prev_item and prev_item + ".headers" == item:

                items[-1][1] = item

                prev_item = None

                continue

            items.append([item, None])

            prev_item = item

        for item, dot_headers in items:

            link = escape(quote(item))

            dot_headers_markup = ""

            if dot_headers is not None:

                dot_headers_markup = (""" (<a href="%(link)s">.headers</a>)""" %

                                      {"link": escape(quote(dot_headers))})

            if os.path.isdir(os.path.join(path, item)):

                link += "/"

                class_ = "dir"

            else:

                class_ = "file"

            yield ("""<li class="%(class)s"><a href="%(link)s">%(name)s</a>%(headers)s</li>""" %

                   {"link": link, "name": escape(item), "class": class_,

                    "headers": dot_headers_markup})

def parse_qs(qs):

    """Parse a query string given as a string argument (data of type

    application/x-www-form-urlencoded). Data are returned as a dictionary. The

    dictionary keys are the unique query variable names and the values are

    lists of values for each name.

    This implementation is used instead of Python's built-in `parse_qs` method

    in order to support the semicolon character (which the built-in method

    interprets as a parameter delimiter)."""

    pairs = [item.split("=", 1) for item in qs.split('&') if item]

    rv = defaultdict(list)

    for pair in pairs:

        if len(pair) == 1 or len(pair[1]) == 0:

            continue

        name = unquote(pair[0].replace('+', ' '))

        value = unquote(pair[1].replace('+', ' '))

        rv[name].append(value)

    return dict(rv)

def wrap_pipeline(path, request, response):

    """Applies pipelines to a response.

    Pipelines are specified in the filename (.sub.) or the query param (?pipe).

"""

    query = parse_qs(request.url_parts.query)

    pipe_string = ""

    if ".sub." in path:

        ml_extensions = {".html", ".htm", ".xht", ".xhtml", ".xml", ".svg"}

        escape_type = "html" if os.path.splitext(path)[1] in ml_extensions else "none"

        pipe_string = "sub(%s)" % escape_type

    if "pipe" in query:

        if pipe_string:

            pipe_string += "|"

        pipe_string += query["pipe"][-1]

    if pipe_string:

        response = Pipeline(pipe_string)(request, response)

    return response

def load_headers(request, path):

    """Loads headers from files for a given path.

    Attempts to load both the neighbouring __dir__{.sub}.headers and

    PATH{.sub}.headers (applying template substitution if needed); results are

    concatenated in that order.

"""

    def _load(request, path):

        headers_path = path + ".sub.headers"

        if os.path.exists(headers_path):

            use_sub = True

        else:

            headers_path = path + ".headers"

            use_sub = False

        try:

            with open(headers_path, "rb") as headers_file:

                data = headers_file.read()

        except OSError:

            return []

        else:

            if use_sub:

                data = template(request, data, escape_type="none")

            return [tuple(item.strip() for item in line.split(b":", 1))

                    for line in data.splitlines() if line]

    return (_load(request, os.path.join(os.path.dirname(path), "__dir__")) +

            _load(request, path))

class FileHandler:

    def __init__(self, base_path=None, url_base="/"):

        self.base_path = base_path

        self.url_base = url_base

        self.directory_handler = DirectoryHandler(self.base_path, self.url_base)

    def __repr__(self):

        return "<%s base_path:%s url_base:%s>" % (self.__class__.__name__, self.base_path, self.url_base)

    def __call__(self, request, response):

        path = filesystem_path(self.base_path, request, self.url_base)

        if os.path.isdir(path):

            return self.directory_handler(request, response)

        try:

            #This is probably racy with some other process trying to change the file

            file_size = os.stat(path).st_size

            response.headers.update(self.get_headers(request, path))

            if "Range" in request.headers:

                try:

                    byte_ranges = RangeParser()(request.headers['Range'], file_size)

                except HTTPException as e:

                    if e.code == 416:

                        response.headers.set("Content-Range", "bytes */%i" % file_size)

                        raise

            else:

                byte_ranges = None

            data = self.get_data(response, path, byte_ranges)

            response.content = data

            response = wrap_pipeline(path, request, response)

            return response

        except OSError:

            raise HTTPException(404)

    def get_headers(self, request, path):

        rv = load_headers(request, path)

        if not any(key.lower() == b"content-type" for (key, _) in rv):

            rv.insert(0, (b"Content-Type", guess_content_type(path).encode("ascii")))

        return rv

    def get_data(self, response, path, byte_ranges):

        """Return either the handle to a file, or a string containing

        the content of a chunk of the file, if we have a range request."""

        if byte_ranges is None:

            return open(path, 'rb')

        else:

            with open(path, 'rb') as f:

                response.status = 206

                if len(byte_ranges) > 1:

                    parts_content_type, content = self.set_response_multipart(response,

                                                                              byte_ranges,

f)

                    for byte_range in byte_ranges:

                        content.append_part(self.get_range_data(f, byte_range),

                                            parts_content_type,

                                            [("Content-Range", byte_range.header_value())])

                    return content

                else:

                    response.headers.set("Content-Range", byte_ranges[0].header_value())

                    return self.get_range_data(f, byte_ranges[0])

    def set_response_multipart(self, response, ranges, f):

        parts_content_type = response.headers.get("Content-Type")

        if parts_content_type:

            parts_content_type = parts_content_type[-1]

        else:

            parts_content_type = None

        content = MultipartContent()

        response.headers.set("Content-Type", "multipart/byteranges; boundary=%s" % content.boundary)

        return parts_content_type, content

    def get_range_data(self, f, byte_range):

        f.seek(byte_range.lower)

        return f.read(byte_range.upper - byte_range.lower)

file_handler = FileHandler()  # type: ignore

class PythonScriptHandler:

    def __init__(self, base_path=None, url_base="/"):

        self.base_path = base_path

        self.url_base = url_base

    def __repr__(self):

        return "<%s base_path:%s url_base:%s>" % (self.__class__.__name__, self.base_path, self.url_base)

    def _load_file(self, request, response, func):

"""

        This loads the requested python file as an environ variable.

        If the requested file is a directory, this instead loads the first

        lexicographically sorted file found in that directory that matches

        "default*.py".

        Once the environ is loaded, the passed `func` is run with this loaded environ.

        :param request: The request object

        :param response: The response object

        :param func: The function to be run with the loaded environ with the modified filepath. Signature: (request, response, environ, path)

        :return: The return of func

"""

        path = filesystem_path(self.base_path, request, self.url_base)

        # Find a default Python file if the specified path is a directory

        if os.path.isdir(path):

            default_py_files = sorted(list(filter(

                pathlib.Path.is_file,

                pathlib.Path(path).glob("default*.py"))))

            if default_py_files:

                path = str(default_py_files[0])

        try:

            environ = {"__file__": path}

            with open(path, 'rb') as f:

                exec(compile(f.read(), path, 'exec'), environ, environ)

            if func is not None:

                return func(request, response, environ, path)

        except OSError:

            raise HTTPException(404)

    def __call__(self, request, response):

        def func(request, response, environ, path):

            if "main" in environ:

                handler = FunctionHandler(environ["main"])

                handler(request, response)

                wrap_pipeline(path, request, response)

            else:

                raise HTTPException(500, "No main function in script %s" % path)

        self._load_file(request, response, func)

    def frame_handler(self, request):

"""

        This creates a FunctionHandler with one or more of the handling functions.

        Used by the H2 server.

        :param request: The request object used to generate the handler.

        :return: A FunctionHandler object with one or more of these functions: `handle_headers`, `handle_data` or `main`

"""

        def func(request, response, environ, path):

            def _main(req, resp):

                pass

            handler = FunctionHandler(_main)

            if "main" in environ:

                handler.func = environ["main"]

            if "handle_headers" in environ:

                handler.handle_headers = environ["handle_headers"]

            if "handle_data" in environ:

                handler.handle_data = environ["handle_data"]

            if handler.func is _main and not hasattr(handler, "handle_headers") and not hasattr(handler, "handle_data"):

                raise HTTPException(500, "No main function or handlers in script %s" % path)

            return handler

        return self._load_file(request, None, func)

python_script_handler = PythonScriptHandler()  # type: ignore

class FunctionHandler:

    def __init__(self, func):

        self.func = func

    def __call__(self, request, response):

        try:

            rv = self.func(request, response)

        except HTTPException:

            raise

        except Exception as e:

            raise HTTPException(500) from e

        if rv is not None:

            if isinstance(rv, tuple):

                if len(rv) == 3:

                    status, headers, content = rv

                    response.status = status

                elif len(rv) == 2:

                    headers, content = rv

                else:

                    raise HTTPException(500)

                response.headers.update(headers)

            else:

                content = rv

            response.content = content

            wrap_pipeline('', request, response)

# The generic name here is so that this can be used as a decorator

def handler(func):

    return FunctionHandler(func)

class JsonHandler:

    def __init__(self, func):

        self.func = func

    def __call__(self, request, response):

        return FunctionHandler(self.handle_request)(request, response)

    def handle_request(self, request, response):

        rv = self.func(request, response)

        response.headers.set("Content-Type", "application/json")

        enc = json.dumps

        if isinstance(rv, tuple):

            rv = list(rv)

            value = tuple(rv[:-1] + [enc(rv[-1])])

            length = len(value[-1])

        else:

            value = enc(rv)

            length = len(value)

        response.headers.set("Content-Length", length)

        return value

def json_handler(func):

    return JsonHandler(func)

class AsIsHandler:

    def __init__(self, base_path=None, url_base="/"):

        self.base_path = base_path

        self.url_base = url_base

    def __call__(self, request, response):

        path = filesystem_path(self.base_path, request, self.url_base)

        if os.path.isdir(path):

            raise HTTPException(

                500, "AsIsHandler cannot process directory, %s" % path)

        try:

            with open(path, 'rb') as f:

                response.writer.write_raw_content(f.read())

            wrap_pipeline(path, request, response)

            response.close_connection = True

        except OSError:

            raise HTTPException(404)

as_is_handler = AsIsHandler()  # type: ignore

class BasicAuthHandler:

    def __init__(self, handler, user, password):

"""

         A Basic Auth handler

         :Args:

         - handler: a secondary handler for the request after authentication is successful (example file_handler)

         - user: string of the valid user name or None if any / all credentials are allowed

         - password: string of the password required

"""

        self.user = user

        self.password = password

        self.handler = handler

    def __call__(self, request, response):

        if "authorization" not in request.headers:

            response.status = 401

            response.headers.set("WWW-Authenticate", "Basic")

            return response

        else:

            auth = Authentication(request.headers)

            if self.user is not None and (self.user != auth.username or self.password != auth.password):

                response.set_error(403, "Invalid username or password")

                return response

            return self.handler(request, response)

basic_auth_handler = BasicAuthHandler(file_handler, None, None)  # type: ignore

class ErrorHandler:

    def __init__(self, status):

        self.status = status

    def __call__(self, request, response):

        response.set_error(self.status)

class StringHandler:

    def __init__(self, data, content_type, **headers):

        """Handler that returns a fixed data string and headers

        :param data: String to use

        :param content_type: Content type header to server the response with

        :param headers: List of headers to send with responses"""

        self.data = data

        self.resp_headers = [("Content-Type", content_type)]

        for k, v in headers.items():

            self.resp_headers.append((k.replace("_", "-"), v))

        self.handler = handler(self.handle_request)

    def handle_request(self, request, response):

        return self.resp_headers, self.data

    def __call__(self, request, response):

        rv = self.handler(request, response)

        return rv

class StaticHandler:

    def __init__(self, path, format_args, content_type, **headers):

        """Handler that reads a file from a path and substitutes some fixed data

        Note that *.headers files have no effect in this handler.

        :param path: Path to the template file to use

        :param format_args: Dictionary of values to substitute into the template file

        :param content_type: Content type header to server the response with

        :param headers: List of headers to send with responses"""

        self._path = path

        self._format_args = format_args

        self._content_type = content_type

        self._headers = headers

        self._handler = None

    def __getnewargs_ex__(self):

        # Do not pickle `self._handler`, which can be arbitrarily large.

        args = self._path, self._format_args, self._content_type

        return args, self._headers

    def __call__(self, request, response):

        # Load the static file contents lazily so that this handler can be

        # pickled and sent to child processes efficiently. Transporting file

        # contents across processes can slow `wptserve` startup by several

        # seconds (crbug.com/1479850).

        if not self._handler:

            with open(self._path) as f:

                data = f.read()

            if self._format_args:

                data = data % self._format_args

            self._handler = StringHandler(data, self._content_type, **self._headers)

        return self._handler(request, response)