Source code

Revision control

Copy as Markdown

Other Tools

# -*- coding: utf-8 -*-
# Copyright JS Foundation and other contributors, https://js.foundation/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import absolute_import, unicode_literals
import re
from .objects import Object
from .compat import xrange, unicode, uchr, uord
from .character import Character, HEX_CONV, OCTAL_CONV
from .messages import Messages
from .token import Token
def hexValue(ch):
return HEX_CONV[ch]
def octalValue(ch):
return OCTAL_CONV[ch]
class RegExp(Object):
def __init__(self, pattern=None, flags=None):
self.pattern = pattern
self.flags = flags
class Position(Object):
def __init__(self, line=None, column=None, offset=None):
self.line = line
self.column = column
self.offset = offset
class SourceLocation(Object):
def __init__(self, start=None, end=None, source=None):
self.start = start
self.end = end
self.source = source
class Comment(Object):
def __init__(self, multiLine=None, slice=None, range=None, loc=None):
self.multiLine = multiLine
self.slice = slice
self.range = range
self.loc = loc
class RawToken(Object):
def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None):
self.type = type
self.value = value
self.pattern = pattern
self.flags = flags
self.regex = regex
self.octal = octal
self.cooked = cooked
self.head = head
self.tail = tail
self.lineNumber = lineNumber
self.lineStart = lineStart
self.start = start
self.end = end
class ScannerState(Object):
def __init__(self, index=None, lineNumber=None, lineStart=None):
self.index = index
self.lineNumber = lineNumber
self.lineStart = lineStart
class Octal(object):
def __init__(self, octal, code):
self.octal = octal
self.code = code
class Scanner(object):
def __init__(self, code, handler):
self.source = unicode(code) + '\x00'
self.errorHandler = handler
self.trackComment = False
self.isModule = False
self.length = len(code)
self.index = 0
self.lineNumber = 1 if self.length > 0 else 0
self.lineStart = 0
self.curlyStack = []
def saveState(self):
return ScannerState(
index=self.index,
lineNumber=self.lineNumber,
lineStart=self.lineStart
)
def restoreState(self, state):
self.index = state.index
self.lineNumber = state.lineNumber
self.lineStart = state.lineStart
def eof(self):
return self.index >= self.length
def throwUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
return self.errorHandler.throwError(self.index, self.lineNumber,
self.index - self.lineStart + 1, message)
def tolerateUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):
self.errorHandler.tolerateError(self.index, self.lineNumber,
self.index - self.lineStart + 1, message)
def skipSingleLineComment(self, offset):
comments = []
if self.trackComment:
start = self.index - offset
loc = SourceLocation(
start=Position(
line=self.lineNumber,
column=self.index - self.lineStart - offset
),
end=Position()
)
while not self.eof():
ch = self.source[self.index]
self.index += 1
if Character.isLineTerminator(ch):
if self.trackComment:
loc.end = Position(
line=self.lineNumber,
column=self.index - self.lineStart - 1
)
entry = Comment(
multiLine=False,
slice=[start + offset, self.index - 1],
range=[start, self.index - 1],
loc=loc
)
comments.append(entry)
if ch == '\r' and self.source[self.index] == '\n':
self.index += 1
self.lineNumber += 1
self.lineStart = self.index
return comments
if self.trackComment:
loc.end = Position(
line=self.lineNumber,
column=self.index - self.lineStart
)
entry = Comment(
multiLine=False,
slice=[start + offset, self.index],
range=[start, self.index],
loc=loc
)
comments.append(entry)
return comments
def skipMultiLineComment(self):
comments = []
if self.trackComment:
comments = []
start = self.index - 2
loc = SourceLocation(
start=Position(
line=self.lineNumber,
column=self.index - self.lineStart - 2
),
end=Position()
)
while not self.eof():
ch = self.source[self.index]
if Character.isLineTerminator(ch):
if ch == '\r' and self.source[self.index + 1] == '\n':
self.index += 1
self.lineNumber += 1
self.index += 1
self.lineStart = self.index
elif ch == '*':
# Block comment ends with '*/'.
if self.source[self.index + 1] == '/':
self.index += 2
if self.trackComment:
loc.end = Position(
line=self.lineNumber,
column=self.index - self.lineStart
)
entry = Comment(
multiLine=True,
slice=[start + 2, self.index - 2],
range=[start, self.index],
loc=loc
)
comments.append(entry)
return comments
self.index += 1
else:
self.index += 1
# Ran off the end of the file - the whole thing is a comment
if self.trackComment:
loc.end = Position(
line=self.lineNumber,
column=self.index - self.lineStart
)
entry = Comment(
multiLine=True,
slice=[start + 2, self.index],
range=[start, self.index],
loc=loc
)
comments.append(entry)
self.tolerateUnexpectedToken()
return comments
def scanComments(self):
comments = []
start = self.index == 0
while not self.eof():
ch = self.source[self.index]
if Character.isWhiteSpace(ch):
self.index += 1
elif Character.isLineTerminator(ch):
self.index += 1
if ch == '\r' and self.source[self.index] == '\n':
self.index += 1
self.lineNumber += 1
self.lineStart = self.index
start = True
elif ch == '/': # U+002F is '/'
ch = self.source[self.index + 1]
if ch == '/':
self.index += 2
comment = self.skipSingleLineComment(2)
if self.trackComment:
comments.extend(comment)
start = True
elif ch == '*': # U+002A is '*'
self.index += 2
comment = self.skipMultiLineComment()
if self.trackComment:
comments.extend(comment)
else:
break
elif start and ch == '-': # U+002D is '-'
# U+003E is '>'
if self.source[self.index + 1:self.index + 3] == '->':
# '-->' is a single-line comment
self.index += 3
comment = self.skipSingleLineComment(3)
if self.trackComment:
comments.extend(comment)
else:
break
elif ch == '<' and not self.isModule: # U+003C is '<'
if self.source[self.index + 1:self.index + 4] == '!--':
self.index += 4 # `<!--`
comment = self.skipSingleLineComment(4)
if self.trackComment:
comments.extend(comment)
else:
break
else:
break
return comments
def isFutureReservedWord(self, id):
return id in self.isFutureReservedWord.set
isFutureReservedWord.set = set((
'enum',
'export',
'import',
'super',
))
def isStrictModeReservedWord(self, id):
return id in self.isStrictModeReservedWord.set
isStrictModeReservedWord.set = set((
'implements',
'interface',
'package',
'private',
'protected',
'public',
'static',
'yield',
'let',
))
def isRestrictedWord(self, id):
return id in self.isRestrictedWord.set
isRestrictedWord.set = set((
'eval', 'arguments',
))
def isKeyword(self, id):
return id in self.isKeyword.set
isKeyword.set = set((
'if', 'in', 'do',
'var', 'for', 'new',
'try', 'let',
'this', 'else', 'case',
'void', 'with', 'enum',
'while', 'break', 'catch',
'throw', 'const', 'yield',
'class', 'super',
'return', 'typeof', 'delete',
'switch', 'export', 'import',
'default', 'finally', 'extends',
'function', 'continue', 'debugger',
'instanceof',
))
def codePointAt(self, i):
return uord(self.source[i:i + 2])
def scanHexEscape(self, prefix):
length = 4 if prefix == 'u' else 2
code = 0
for i in xrange(length):
if not self.eof() and Character.isHexDigit(self.source[self.index]):
ch = self.source[self.index]
self.index += 1
code = code * 16 + hexValue(ch)
else:
return None
return uchr(code)
def scanUnicodeCodePointEscape(self):
ch = self.source[self.index]
code = 0
# At least, one hex digit is required.
if ch == '}':
self.throwUnexpectedToken()
while not self.eof():
ch = self.source[self.index]
self.index += 1
if not Character.isHexDigit(ch):
break
code = code * 16 + hexValue(ch)
if code > 0x10FFFF or ch != '}':
self.throwUnexpectedToken()
return Character.fromCodePoint(code)
def getIdentifier(self):
start = self.index
self.index += 1
while not self.eof():
ch = self.source[self.index]
if ch == '\\':
# Blackslash (U+005C) marks Unicode escape sequence.
self.index = start
return self.getComplexIdentifier()
else:
cp = ord(ch)
if cp >= 0xD800 and cp < 0xDFFF:
# Need to handle surrogate pairs.
self.index = start
return self.getComplexIdentifier()
if Character.isIdentifierPart(ch):
self.index += 1
else:
break
return self.source[start:self.index]
def getComplexIdentifier(self):
cp = self.codePointAt(self.index)
id = Character.fromCodePoint(cp)
self.index += len(id)
# '\u' (U+005C, U+0075) denotes an escaped character.
if cp == 0x5C:
if self.source[self.index] != 'u':
self.throwUnexpectedToken()
self.index += 1
if self.source[self.index] == '{':
self.index += 1
ch = self.scanUnicodeCodePointEscape()
else:
ch = self.scanHexEscape('u')
if not ch or ch == '\\' or not Character.isIdentifierStart(ch[0]):
self.throwUnexpectedToken()
id = ch
while not self.eof():
cp = self.codePointAt(self.index)
ch = Character.fromCodePoint(cp)
if not Character.isIdentifierPart(ch):
break
id += ch
self.index += len(ch)
# '\u' (U+005C, U+0075) denotes an escaped character.
if cp == 0x5C:
id = id[:-1]
if self.source[self.index] != 'u':
self.throwUnexpectedToken()
self.index += 1
if self.source[self.index] == '{':
self.index += 1
ch = self.scanUnicodeCodePointEscape()
else:
ch = self.scanHexEscape('u')
if not ch or ch == '\\' or not Character.isIdentifierPart(ch[0]):
self.throwUnexpectedToken()
id += ch
return id
def octalToDecimal(self, ch):
# \0 is not octal escape sequence
octal = ch != '0'
code = octalValue(ch)
if not self.eof() and Character.isOctalDigit(self.source[self.index]):
octal = True
code = code * 8 + octalValue(self.source[self.index])
self.index += 1
# 3 digits are only allowed when string starts
# with 0, 1, 2, 3
if ch in '0123' and not self.eof() and Character.isOctalDigit(self.source[self.index]):
code = code * 8 + octalValue(self.source[self.index])
self.index += 1
return Octal(octal, code)
def scanIdentifier(self):
start = self.index
# Backslash (U+005C) starts an escaped character.
id = self.getComplexIdentifier() if self.source[start] == '\\' else self.getIdentifier()
# There is no keyword or literal with only one character.
# Thus, it must be an identifier.
if len(id) == 1:
type = Token.Identifier
elif self.isKeyword(id):
type = Token.Keyword
elif id == 'null':
type = Token.NullLiteral
elif id == 'true' or id == 'false':
type = Token.BooleanLiteral
else:
type = Token.Identifier
if type is not Token.Identifier and start + len(id) != self.index:
restore = self.index
self.index = start
self.tolerateUnexpectedToken(Messages.InvalidEscapedReservedWord)
self.index = restore
return RawToken(
type=type,
value=id,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanPunctuator(self):
start = self.index
# Check for most common single-character punctuators.
str = self.source[self.index]
if str in (
'(',
'{',
):
if str == '{':
self.curlyStack.append('{')
self.index += 1
elif str == '.':
self.index += 1
if self.source[self.index] == '.' and self.source[self.index + 1] == '.':
# Spread operator: ...
self.index += 2
str = '...'
elif str == '}':
self.index += 1
if self.curlyStack:
self.curlyStack.pop()
elif str in (
')',
';',
',',
'[',
']',
':',
'?',
'~',
):
self.index += 1
else:
# 4-character punctuator.
str = self.source[self.index:self.index + 4]
if str == '>>>=':
self.index += 4
else:
# 3-character punctuators.
str = str[:3]
if str in (
'===', '!==', '>>>',
'<<=', '>>=', '**='
):
self.index += 3
else:
# 2-character punctuators.
str = str[:2]
if str in (
'&&', '||', '==', '!=',
'+=', '-=', '*=', '/=',
'++', '--', '<<', '>>',
'&=', '|=', '^=', '%=',
'<=', '>=', '=>', '**',
):
self.index += 2
else:
# 1-character punctuators.
str = self.source[self.index]
if str in '<>=!+-*%&|^/':
self.index += 1
if self.index == start:
self.throwUnexpectedToken()
return RawToken(
type=Token.Punctuator,
value=str,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanHexLiteral(self, start):
num = ''
while not self.eof():
if not Character.isHexDigit(self.source[self.index]):
break
num += self.source[self.index]
self.index += 1
if len(num) == 0:
self.throwUnexpectedToken()
if Character.isIdentifierStart(self.source[self.index]):
self.throwUnexpectedToken()
return RawToken(
type=Token.NumericLiteral,
value=int(num, 16),
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanBinaryLiteral(self, start):
num = ''
while not self.eof():
ch = self.source[self.index]
if ch != '0' and ch != '1':
break
num += self.source[self.index]
self.index += 1
if len(num) == 0:
# only 0b or 0B
self.throwUnexpectedToken()
if not self.eof():
ch = self.source[self.index]
if Character.isIdentifierStart(ch) or Character.isDecimalDigit(ch):
self.throwUnexpectedToken()
return RawToken(
type=Token.NumericLiteral,
value=int(num, 2),
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanOctalLiteral(self, prefix, start):
num = ''
octal = False
if Character.isOctalDigit(prefix[0]):
octal = True
num = '0' + self.source[self.index]
self.index += 1
while not self.eof():
if not Character.isOctalDigit(self.source[self.index]):
break
num += self.source[self.index]
self.index += 1
if not octal and len(num) == 0:
# only 0o or 0O
self.throwUnexpectedToken()
if Character.isIdentifierStart(self.source[self.index]) or Character.isDecimalDigit(self.source[self.index]):
self.throwUnexpectedToken()
return RawToken(
type=Token.NumericLiteral,
value=int(num, 8),
octal=octal,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def isImplicitOctalLiteral(self):
# Implicit octal, unless there is a non-octal digit.
# (Annex B.1.1 on Numeric Literals)
for i in xrange(self.index + 1, self.length):
ch = self.source[i]
if ch in '89':
return False
if not Character.isOctalDigit(ch):
return True
return True
def scanNumericLiteral(self):
start = self.index
ch = self.source[start]
assert Character.isDecimalDigit(ch) or ch == '.', 'Numeric literal must start with a decimal digit or a decimal point'
num = ''
if ch != '.':
num = self.source[self.index]
self.index += 1
ch = self.source[self.index]
# Hex number starts with '0x'.
# Octal number starts with '0'.
# Octal number in ES6 starts with '0o'.
# Binary number in ES6 starts with '0b'.
if num == '0':
if ch in ('x', 'X'):
self.index += 1
return self.scanHexLiteral(start)
if ch in ('b', 'B'):
self.index += 1
return self.scanBinaryLiteral(start)
if ch in ('o', 'O'):
return self.scanOctalLiteral(ch, start)
if ch and Character.isOctalDigit(ch):
if self.isImplicitOctalLiteral():
return self.scanOctalLiteral(ch, start)
while Character.isDecimalDigit(self.source[self.index]):
num += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if ch == '.':
num += self.source[self.index]
self.index += 1
while Character.isDecimalDigit(self.source[self.index]):
num += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if ch in ('e', 'E'):
num += self.source[self.index]
self.index += 1
ch = self.source[self.index]
if ch in ('+', '-'):
num += self.source[self.index]
self.index += 1
if Character.isDecimalDigit(self.source[self.index]):
while Character.isDecimalDigit(self.source[self.index]):
num += self.source[self.index]
self.index += 1
else:
self.throwUnexpectedToken()
if Character.isIdentifierStart(self.source[self.index]):
self.throwUnexpectedToken()
value = float(num)
return RawToken(
type=Token.NumericLiteral,
value=int(value) if value.is_integer() else value,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanStringLiteral(self):
start = self.index
quote = self.source[start]
assert quote in ('\'', '"'), 'String literal must starts with a quote'
self.index += 1
octal = False
str = ''
while not self.eof():
ch = self.source[self.index]
self.index += 1
if ch == quote:
quote = ''
break
elif ch == '\\':
ch = self.source[self.index]
self.index += 1
if not ch or not Character.isLineTerminator(ch):
if ch == 'u':
if self.source[self.index] == '{':
self.index += 1
str += self.scanUnicodeCodePointEscape()
else:
unescapedChar = self.scanHexEscape(ch)
if not unescapedChar:
self.throwUnexpectedToken()
str += unescapedChar
elif ch == 'x':
unescaped = self.scanHexEscape(ch)
if not unescaped:
self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)
str += unescaped
elif ch == 'n':
str += '\n'
elif ch == 'r':
str += '\r'
elif ch == 't':
str += '\t'
elif ch == 'b':
str += '\b'
elif ch == 'f':
str += '\f'
elif ch == 'v':
str += '\x0B'
elif ch in (
'8',
'9',
):
str += ch
self.tolerateUnexpectedToken()
else:
if ch and Character.isOctalDigit(ch):
octToDec = self.octalToDecimal(ch)
octal = octToDec.octal or octal
str += uchr(octToDec.code)
else:
str += ch
else:
self.lineNumber += 1
if ch == '\r' and self.source[self.index] == '\n':
self.index += 1
self.lineStart = self.index
elif Character.isLineTerminator(ch):
break
else:
str += ch
if quote != '':
self.index = start
self.throwUnexpectedToken()
return RawToken(
type=Token.StringLiteral,
value=str,
octal=octal,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def scanTemplate(self):
cooked = ''
terminated = False
start = self.index
head = self.source[start] == '`'
tail = False
rawOffset = 2
self.index += 1
while not self.eof():
ch = self.source[self.index]
self.index += 1
if ch == '`':
rawOffset = 1
tail = True
terminated = True
break
elif ch == '$':
if self.source[self.index] == '{':
self.curlyStack.append('${')
self.index += 1
terminated = True
break
cooked += ch
elif ch == '\\':
ch = self.source[self.index]
self.index += 1
if not Character.isLineTerminator(ch):
if ch == 'n':
cooked += '\n'
elif ch == 'r':
cooked += '\r'
elif ch == 't':
cooked += '\t'
elif ch == 'u':
if self.source[self.index] == '{':
self.index += 1
cooked += self.scanUnicodeCodePointEscape()
else:
restore = self.index
unescapedChar = self.scanHexEscape(ch)
if unescapedChar:
cooked += unescapedChar
else:
self.index = restore
cooked += ch
elif ch == 'x':
unescaped = self.scanHexEscape(ch)
if not unescaped:
self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)
cooked += unescaped
elif ch == 'b':
cooked += '\b'
elif ch == 'f':
cooked += '\f'
elif ch == 'v':
cooked += '\v'
else:
if ch == '0':
if Character.isDecimalDigit(self.source[self.index]):
# Illegal: \01 \02 and so on
self.throwUnexpectedToken(Messages.TemplateOctalLiteral)
cooked += '\0'
elif Character.isOctalDigit(ch):
# Illegal: \1 \2
self.throwUnexpectedToken(Messages.TemplateOctalLiteral)
else:
cooked += ch
else:
self.lineNumber += 1
if ch == '\r' and self.source[self.index] == '\n':
self.index += 1
self.lineStart = self.index
elif Character.isLineTerminator(ch):
self.lineNumber += 1
if ch == '\r' and self.source[self.index] == '\n':
self.index += 1
self.lineStart = self.index
cooked += '\n'
else:
cooked += ch
if not terminated:
self.throwUnexpectedToken()
if not head:
if self.curlyStack:
self.curlyStack.pop()
return RawToken(
type=Token.Template,
value=self.source[start + 1:self.index - rawOffset],
cooked=cooked,
head=head,
tail=tail,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def testRegExp(self, pattern, flags):
# The BMP character to use as a replacement for astral symbols when
# translating an ES6 "u"-flagged pattern to an ES5-compatible
# approximation.
# Note: replacing with '\uFFFF' enables false positives in unlikely
# scenarios. For example, `[\u{1044f}-\u{10440}]` is an invalid
# pattern that would not be detected by this substitution.
astralSubstitute = '\uFFFF'
# Replace every Unicode escape sequence with the equivalent
# BMP character or a constant ASCII code point in the case of
# astral symbols. (See the above note on `astralSubstitute`
# for more information.)
def astralSub(m):
codePoint = int(m.group(1) or m.group(2), 16)
if codePoint > 0x10FFFF:
self.tolerateUnexpectedToken(Messages.InvalidRegExp)
elif codePoint <= 0xFFFF:
return uchr(codePoint)
return astralSubstitute
pattern = re.sub(r'\\u\{([0-9a-fA-F]+)\}|\\u([a-fA-F0-9]{4})', astralSub, pattern)
# Replace each paired surrogate with a single ASCII symbol to
# avoid throwing on regular expressions that are only valid in
# combination with the "u" flag.
pattern = re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', astralSubstitute, pattern)
# Return a regular expression object for this pattern-flag pair, or
# `null` in case the current environment doesn't support the flags it
# uses.
pyflags = 0 | re.M if 'm' in flags else 0 | re.I if 'i' in flags else 0
try:
return re.compile(pattern, pyflags)
except Exception:
self.tolerateUnexpectedToken(Messages.InvalidRegExp)
def scanRegExpBody(self):
ch = self.source[self.index]
assert ch == '/', 'Regular expression literal must start with a slash'
str = self.source[self.index]
self.index += 1
classMarker = False
terminated = False
while not self.eof():
ch = self.source[self.index]
self.index += 1
str += ch
if ch == '\\':
ch = self.source[self.index]
self.index += 1
if Character.isLineTerminator(ch):
self.throwUnexpectedToken(Messages.UnterminatedRegExp)
str += ch
elif Character.isLineTerminator(ch):
self.throwUnexpectedToken(Messages.UnterminatedRegExp)
elif classMarker:
if ch == ']':
classMarker = False
else:
if ch == '/':
terminated = True
break
elif ch == '[':
classMarker = True
if not terminated:
self.throwUnexpectedToken(Messages.UnterminatedRegExp)
# Exclude leading and trailing slash.
return str[1:-1]
def scanRegExpFlags(self):
str = ''
flags = ''
while not self.eof():
ch = self.source[self.index]
if not Character.isIdentifierPart(ch):
break
self.index += 1
if ch == '\\' and not self.eof():
ch = self.source[self.index]
if ch == 'u':
self.index += 1
restore = self.index
char = self.scanHexEscape('u')
if char:
flags += char
str += '\\u'
while restore < self.index:
str += self.source[restore]
restore += 1
else:
self.index = restore
flags += 'u'
str += '\\u'
self.tolerateUnexpectedToken()
else:
str += '\\'
self.tolerateUnexpectedToken()
else:
flags += ch
str += ch
return flags
def scanRegExp(self):
start = self.index
pattern = self.scanRegExpBody()
flags = self.scanRegExpFlags()
value = self.testRegExp(pattern, flags)
return RawToken(
type=Token.RegularExpression,
value='',
pattern=pattern,
flags=flags,
regex=value,
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=start,
end=self.index
)
def lex(self):
if self.eof():
return RawToken(
type=Token.EOF,
value='',
lineNumber=self.lineNumber,
lineStart=self.lineStart,
start=self.index,
end=self.index
)
ch = self.source[self.index]
if Character.isIdentifierStart(ch):
return self.scanIdentifier()
# Very common: ( and ) and ;
if ch in ('(', ')', ';'):
return self.scanPunctuator()
# String literal starts with single quote (U+0027) or double quote (U+0022).
if ch in ('\'', '"'):
return self.scanStringLiteral()
# Dot (.) U+002E can also start a floating-point number, hence the need
# to check the next character.
if ch == '.':
if Character.isDecimalDigit(self.source[self.index + 1]):
return self.scanNumericLiteral()
return self.scanPunctuator()
if Character.isDecimalDigit(ch):
return self.scanNumericLiteral()
# Template literals start with ` (U+0060) for template head
# or } (U+007D) for template middle or template tail.
if ch == '`' or (ch == '}' and self.curlyStack and self.curlyStack[-1] == '${'):
return self.scanTemplate()
# Possible identifier start in a surrogate pair.
cp = ord(ch)
if cp >= 0xD800 and cp < 0xDFFF:
cp = self.codePointAt(self.index)
ch = Character.fromCodePoint(cp)
if Character.isIdentifierStart(ch):
return self.scanIdentifier()
return self.scanPunctuator()