macpkg.py - mozsearch

Enable keyboard shortcuts

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# TODO: Eventually consolidate with mozpack.pkg module. This is kept separate

# for now because of the vast difference in API, and to avoid churn for the

# users of this module (docker images, macos SDK artifacts) when changes are

# necessary in mozpack.pkg

import bz2

import concurrent.futures

import io

import lzma

import os

import struct

import zlib

from collections import deque, namedtuple

from xml.etree.ElementTree import XML

class ZlibFile(object):

    def __init__(self, fileobj):

        self.fileobj = fileobj

        self.decompressor = zlib.decompressobj()

        self.buf = b""

    def read(self, length):

        cutoff = min(length, len(self.buf))

        result = self.buf[:cutoff]

        self.buf = self.buf[cutoff:]

        while len(result) < length:

            buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE)

            if not buf:

                break

            buf = self.decompressor.decompress(buf)

            cutoff = min(length - len(result), len(buf))

            result += buf[:cutoff]

            self.buf += buf[cutoff:]

        return result

def unxar(fileobj):

    magic = fileobj.read(4)

    if magic != b"xar!":

        raise Exception("Not a XAR?")

    header_size = fileobj.read(2)

    header_size = struct.unpack(">H", header_size)[0]

    if header_size > 64:

        raise Exception(

            f"Don't know how to handle a {header_size} bytes XAR header size"

    header_size -= 6  # what we've read so far.

    header = fileobj.read(header_size)

    if len(header) != header_size:

        raise Exception("Failed to read XAR header")

        version,

        compressed_toc_len,

        uncompressed_toc_len,

        checksum_type,

    ) = struct.unpack(">HQQL", header[:22])

    if version != 1:

        raise Exception(f"XAR version {version} not supported")

    toc = fileobj.read(compressed_toc_len)

    base = fileobj.tell()

    if len(toc) != compressed_toc_len:

        raise Exception("Failed to read XAR TOC")

    toc = zlib.decompress(toc)

    if len(toc) != uncompressed_toc_len:

        raise Exception("Corrupted XAR?")

    toc = XML(toc).find("toc")

    queue = deque(toc.findall("file"))

    while queue:

        f = queue.pop()

        queue.extend(f.iterfind("file"))

        if f.find("type").text != "file":

            continue

        filename = f.find("name").text

        data = f.find("data")

        length = int(data.find("length").text)

        size = int(data.find("size").text)

        offset = int(data.find("offset").text)

        encoding = data.find("encoding").get("style")

        fileobj.seek(base + offset, os.SEEK_SET)

        content = Take(fileobj, length)

        if encoding == "application/octet-stream":

            if length != size:

                raise Exception(f"{length} != {size}")

        elif encoding == "application/x-bzip2":

            content = bz2.BZ2File(content)

        elif encoding == "application/x-gzip":

            # Despite the encoding saying gzip, it is in fact, a raw zlib stream.

            content = ZlibFile(content)

        else:

            raise Exception(f"XAR encoding {encoding} not supported")

        yield filename, content

class Pbzx(object):

    def __init__(self, fileobj):

        magic = fileobj.read(4)

        if magic != b"pbzx":

            raise Exception("Not a PBZX payload?")

        # The first thing in the file looks like the size of each

        # decompressed chunk except the last one. It should match

        # decompressed_size in all cases except last, but we don't

        # check.

        chunk_size = fileobj.read(8)

        chunk_size = struct.unpack(">Q", chunk_size)[0]

        executor = concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count())

        self.chunk_getter = executor.map(self._uncompress_chunk, self._chunker(fileobj))

        self._init_one_chunk()

    @staticmethod

    def _chunker(fileobj):

        while True:

            header = fileobj.read(16)

            if header == b"":

                break

            if len(header) != 16:

                raise Exception("Corrupted PBZX payload?")

            decompressed_size, compressed_size = struct.unpack(">QQ", header)

            chunk = fileobj.read(compressed_size)

            yield decompressed_size, compressed_size, chunk

    @staticmethod

    def _uncompress_chunk(data):

        decompressed_size, compressed_size, chunk = data

        if compressed_size != decompressed_size:

            chunk = lzma.decompress(chunk)

            if len(chunk) != decompressed_size:

                raise Exception("Corrupted PBZX payload?")

        return chunk

    def _init_one_chunk(self):

        self.offset = 0

        self.chunk = next(self.chunk_getter, "")

    def read(self, length=None):

        if length == 0:

            return b""

        if length and len(self.chunk) >= self.offset + length:

            start = self.offset

            self.offset += length

            return self.chunk[start : self.offset]

        else:

            result = self.chunk[self.offset :]

            self._init_one_chunk()

            if self.chunk:

                # XXX: suboptimal if length is larger than the chunk size

                result += self.read(None if length is None else length - len(result))

            return result

class Take(object):

"""

    File object wrapper that allows to read at most a certain length.

"""

    def __init__(self, fileobj, limit):

        self.fileobj = fileobj

        self.limit = limit

    def read(self, length=None):

        if length is None:

            length = self.limit

        else:

            length = min(length, self.limit)

        result = self.fileobj.read(length)

        self.limit -= len(result)

        return result

CpioInfo = namedtuple("CpioInfo", ["mode", "nlink", "dev", "ino"])

def uncpio(fileobj):

    while True:

        magic = fileobj.read(6)

        # CPIO payloads in mac pkg files are using the portable ASCII format.

        if magic != b"070707":

            if magic.startswith(b"0707"):

                raise Exception("Unsupported CPIO format")

            raise Exception("Not a CPIO header")

        header = fileobj.read(70)

            dev,

            ino,

            mode,

            uid,

            gid,

            nlink,

            rdev,

            mtime,

            namesize,

            filesize,

        ) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header)

        dev = int(dev, 8)

        ino = int(ino, 8)

        mode = int(mode, 8)

        nlink = int(nlink, 8)

        namesize = int(namesize, 8)

        filesize = int(filesize, 8)

        name = fileobj.read(namesize)

        if name[-1] != 0:

            raise Exception("File name is not NUL terminated")

        name = name[:-1]

        if name == b"TRAILER!!!":

            break

        if b"/../" in name or name.startswith(b"../") or name == b"..":

            raise Exception(".. is forbidden in file name")

        if name.startswith(b"."):

            name = name[1:]

        if name.startswith(b"/"):

            name = name[1:]

        content = Take(fileobj, filesize)

        yield name, CpioInfo(mode=mode, nlink=nlink, dev=dev, ino=ino), content

        # Ensure the content is totally consumed

        while content.read(4096):

            pass