Source code

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import re
def _tokens2re(**tokens):
# Create a pattern for non-escaped tokens, in the form:
# (?<!\\)(?:a|b|c...)
# This is meant to match patterns a, b, or c, or ... if they are not
# preceded by a backslash.
# where a, b, c... are in the form
# (?P<name>pattern)
# which matches the pattern and captures it in a named match group.
# The group names and patterns are given as arguments.
all_tokens = "|".join(
"(?P<%s>%s)" % (name, value) for name, value in tokens.items()
)
nonescaped = r"(?<!\\)(?:%s)" % all_tokens
# The final pattern matches either the above pattern, or an escaped
# backslash, captured in the "escape" match group.
return re.compile("(?:%s|%s)" % (nonescaped, r"(?P<escape>\\\\)"))
UNQUOTED_TOKENS_RE = _tokens2re(
whitespace=r"[\t\r\n ]+",
quote=r'[\'"]',
comment="#",
special=r"[<>&|`(){}$;\*\?]",
backslashed=r"\\[^\\]",
)
DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
quote='"',
backslashedquote=r'\\"',
special=r"\$",
backslashed=r'\\[^\\"]',
)
ESCAPED_NEWLINES_RE = re.compile(r"\\\n")
# This regexp contains the same characters as all those listed in
# UNQUOTED_TOKENS_RE. Please keep in sync.
SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")
class MetaCharacterException(Exception):
def __init__(self, char):
self.char = char
class _ClineSplitter(object):
"""
Parses a given command line string and creates a list of command
and arguments, with wildcard expansion.
"""
def __init__(self, cline):
self.arg = None
self.cline = cline
self.result = []
self._parse_unquoted()
def _push(self, str):
"""
Push the given string as part of the current argument
"""
if self.arg is None:
self.arg = ""
self.arg += str
def _next(self):
"""
Finalize current argument, effectively adding it to the list.
"""
if self.arg is None:
return
self.result.append(self.arg)
self.arg = None
def _parse_unquoted(self):
"""
Parse command line remainder in the context of an unquoted string.
"""
while self.cline:
# Find the next token
m = UNQUOTED_TOKENS_RE.search(self.cline)
# If we find none, the remainder of the string can be pushed to
# the current argument and the argument finalized
if not m:
self._push(self.cline)
break
# The beginning of the string, up to the found token, is part of
# the current argument
if m.start():
self._push(self.cline[: m.start()])
self.cline = self.cline[m.end() :]
match = {name: value for name, value in m.groupdict().items() if value}
if "quote" in match:
# " or ' start a quoted string
if match["quote"] == '"':
self._parse_doubly_quoted()
else:
self._parse_quoted()
elif "comment" in match:
# Comments are ignored. The current argument can be finalized,
# and parsing stopped.
break
elif "special" in match:
# Unquoted, non-escaped special characters need to be sent to a
# shell.
raise MetaCharacterException(match["special"])
elif "whitespace" in match:
# Whitespaces terminate current argument.
self._next()
elif "escape" in match:
# Escaped backslashes turn into a single backslash
self._push("\\")
elif "backslashed" in match:
# Backslashed characters are unbackslashed
# e.g. echo \a -> a
self._push(match["backslashed"][1])
else:
raise Exception("Shouldn't reach here")
if self.arg:
self._next()
def _parse_quoted(self):
# Single quoted strings are preserved, except for the final quote
index = self.cline.find("'")
if index == -1:
raise Exception("Unterminated quoted string in command")
self._push(self.cline[:index])
self.cline = self.cline[index + 1 :]
def _parse_doubly_quoted(self):
if not self.cline:
raise Exception("Unterminated quoted string in command")
while self.cline:
m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
if not m:
raise Exception("Unterminated quoted string in command")
self._push(self.cline[: m.start()])
self.cline = self.cline[m.end() :]
match = {name: value for name, value in m.groupdict().items() if value}
if "quote" in match:
# a double quote ends the quoted string, so go back to
# unquoted parsing
return
elif "special" in match:
# Unquoted, non-escaped special characters in a doubly quoted
# string still have a special meaning and need to be sent to a
# shell.
raise MetaCharacterException(match["special"])
elif "escape" in match:
# Escaped backslashes turn into a single backslash
self._push("\\")
elif "backslashedquote" in match:
# Backslashed double quotes are un-backslashed
self._push('"')
elif "backslashed" in match:
# Backslashed characters are kept backslashed
self._push(match["backslashed"])
def split(cline):
"""
Split the given command line string.
"""
s = ESCAPED_NEWLINES_RE.sub("", cline)
return _ClineSplitter(s).result
def _quote(s):
"""Given a string, returns a version that can be used literally on a shell
command line, enclosing it with single quotes if necessary.
As a special case, if given an int, returns a string containing the int,
not enclosed in quotes.
"""
if type(s) == int:
return f"{s}"
# Empty strings need to be quoted to have any significance
if s and not SHELL_QUOTE_RE.search(s) and s[0] != "~":
return s
# Single quoted strings can contain any characters unescaped except the
# single quote itself, which can't even be escaped, so the string needs to
# be closed, an escaped single quote added, and reopened.
t = type(s)
return t("'%s'") % s.replace(t("'"), t("'\\''"))
def quote(*strings):
"""Given one or more strings, returns a quoted string that can be used
literally on a shell command line.
>>> quote('a', 'b')
"a b"
>>> quote('a b', 'c')
"'a b' c"
"""
return " ".join(_quote(s) for s in strings)
__all__ = ["MetaCharacterException", "split", "quote"]