DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (f2644bf19c9f)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function

import re


def _tokens2re(**tokens):
    # Create a pattern for non-escaped tokens, in the form:
    #   (?<!\\)(?:a|b|c...)
    # This is meant to match patterns a, b, or c, or ... if they are not
    # preceded by a backslash.
    # where a, b, c... are in the form
    #   (?P<name>pattern)
    # which matches the pattern and captures it in a named match group.
    # The group names and patterns are given as arguments.
    all_tokens = '|'.join('(?P<%s>%s)' % (name, value)
                          for name, value in tokens.items())
    nonescaped = r'(?<!\\)(?:%s)' % all_tokens

    # The final pattern matches either the above pattern, or an escaped
    # backslash, captured in the "escape" match group.
    return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))


UNQUOTED_TOKENS_RE = _tokens2re(
  whitespace=r'[\t\r\n ]+',
  quote=r'[\'"]',
  comment='#',
  special=r'[<>&|`(){}$;\*\?]',
  backslashed=r'\\[^\\]',
)

DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
  quote='"',
  backslashedquote=r'\\"',
  special='\$',
  backslashed=r'\\[^\\"]',
)

ESCAPED_NEWLINES_RE = re.compile(r'\\\n')

# This regexp contains the same characters as all those listed in
# UNQUOTED_TOKENS_RE. Please keep in sync.
SHELL_QUOTE_RE = re.compile(r'[\\\t\r\n \'\"#<>&|`(){}$;\*\?]')


class MetaCharacterException(Exception):
    def __init__(self, char):
        self.char = char


class _ClineSplitter(object):
    '''
    Parses a given command line string and creates a list of command
    and arguments, with wildcard expansion.
    '''

    def __init__(self, cline):
        self.arg = None
        self.cline = cline
        self.result = []
        self._parse_unquoted()

    def _push(self, str):
        '''
        Push the given string as part of the current argument
        '''
        if self.arg is None:
            self.arg = ''
        self.arg += str

    def _next(self):
        '''
        Finalize current argument, effectively adding it to the list.
        '''
        if self.arg is None:
            return
        self.result.append(self.arg)
        self.arg = None

    def _parse_unquoted(self):
        '''
        Parse command line remainder in the context of an unquoted string.
        '''
        while self.cline:
            # Find the next token
            m = UNQUOTED_TOKENS_RE.search(self.cline)
            # If we find none, the remainder of the string can be pushed to
            # the current argument and the argument finalized
            if not m:
                self._push(self.cline)
                break
            # The beginning of the string, up to the found token, is part of
            # the current argument
            if m.start():
                self._push(self.cline[:m.start()])
            self.cline = self.cline[m.end():]

            match = {name: value
                     for name, value in m.groupdict().items() if value}
            if 'quote' in match:
                # " or ' start a quoted string
                if match['quote'] == '"':
                    self._parse_doubly_quoted()
                else:
                    self._parse_quoted()
            elif 'comment' in match:
                # Comments are ignored. The current argument can be finalized,
                # and parsing stopped.
                break
            elif 'special' in match:
                # Unquoted, non-escaped special characters need to be sent to a
                # shell.
                raise MetaCharacterException(match['special'])
            elif 'whitespace' in match:
                # Whitespaces terminate current argument.
                self._next()
            elif 'escape' in match:
                # Escaped backslashes turn into a single backslash
                self._push('\\')
            elif 'backslashed' in match:
                # Backslashed characters are unbackslashed
                # e.g. echo \a -> a
                self._push(match['backslashed'][1])
            else:
                raise Exception("Shouldn't reach here")
        if self.arg:
            self._next()

    def _parse_quoted(self):
        # Single quoted strings are preserved, except for the final quote
        index = self.cline.find("'")
        if index == -1:
            raise Exception('Unterminated quoted string in command')
        self._push(self.cline[:index])
        self.cline = self.cline[index+1:]

    def _parse_doubly_quoted(self):
        if not self.cline:
            raise Exception('Unterminated quoted string in command')
        while self.cline:
            m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
            if not m:
                raise Exception('Unterminated quoted string in command')
            self._push(self.cline[:m.start()])
            self.cline = self.cline[m.end():]
            match = {name: value
                     for name, value in m.groupdict().items() if value}
            if 'quote' in match:
                # a double quote ends the quoted string, so go back to
                # unquoted parsing
                return
            elif 'special' in match:
                # Unquoted, non-escaped special characters in a doubly quoted
                # string still have a special meaning and need to be sent to a
                # shell.
                raise MetaCharacterException(match['special'])
            elif 'escape' in match:
                # Escaped backslashes turn into a single backslash
                self._push('\\')
            elif 'backslashedquote' in match:
                # Backslashed double quotes are un-backslashed
                self._push('"')
            elif 'backslashed' in match:
                # Backslashed characters are kept backslashed
                self._push(match['backslashed'])


def split(cline):
    '''
    Split the given command line string.
    '''
    s = ESCAPED_NEWLINES_RE.sub('', cline)
    return _ClineSplitter(s).result


def _quote(s):
    '''Given a string, returns a version that can be used literally on a shell
    command line, enclosing it with single quotes if necessary.

    As a special case, if given an int, returns a string containing the int,
    not enclosed in quotes.
    '''
    if type(s) == int:
        return '%d' % s

    # Empty strings need to be quoted to have any significance
    if s and not SHELL_QUOTE_RE.search(s) and not s.startswith('~'):
        return s

    # Single quoted strings can contain any characters unescaped except the
    # single quote itself, which can't even be escaped, so the string needs to
    # be closed, an escaped single quote added, and reopened.
    t = type(s)
    return t("'%s'") % s.replace(t("'"), t("'\\''"))


def quote(*strings):
    '''Given one or more strings, returns a quoted string that can be used
    literally on a shell command line.

        >>> quote('a', 'b')
        "a b"
        >>> quote('a b', 'c')
        "'a b' c"
    '''
    return ' '.join(_quote(s) for s in strings)


__all__ = ['MetaCharacterException', 'split', 'quote']