scanner.py - mozsearch

mozilla-central/third_party/python/esprima/esprima/scanner.py

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

# -*- coding: utf-8 -*-

# Copyright JS Foundation and other contributors, https://js.foundation/

# Redistribution and use in source and binary forms, with or without

# modification, are permitted provided that the following conditions are met:

#   * Redistributions of source code must retain the above copyright

#     notice, this list of conditions and the following disclaimer.

#   * Redistributions in binary form must reproduce the above copyright

#     notice, this list of conditions and the following disclaimer in the

#     documentation and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY

# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import, unicode_literals

import re

from .objects import Object

from .compat import xrange, unicode, uchr, uord

from .character import Character, HEX_CONV, OCTAL_CONV

from .messages import Messages

from .token import Token

def hexValue(ch):

    return HEX_CONV[ch]

def octalValue(ch):

    return OCTAL_CONV[ch]

class RegExp(Object):

    def __init__(self, pattern=None, flags=None):

        self.pattern = pattern

        self.flags = flags

class Position(Object):

    def __init__(self, line=None, column=None, offset=None):

        self.line = line

        self.column = column

        self.offset = offset

class SourceLocation(Object):

    def __init__(self, start=None, end=None, source=None):

        self.start = start

        self.end = end

        self.source = source

class Comment(Object):

    def __init__(self, multiLine=None, slice=None, range=None, loc=None):

        self.multiLine = multiLine

        self.slice = slice

        self.range = range

        self.loc = loc

class RawToken(Object):

    def __init__(self, type=None, value=None, pattern=None, flags=None, regex=None, octal=None, cooked=None, head=None, tail=None, lineNumber=None, lineStart=None, start=None, end=None):

        self.type = type

        self.value = value

        self.pattern = pattern

        self.flags = flags

        self.regex = regex

        self.octal = octal

        self.cooked = cooked

        self.head = head

        self.tail = tail

        self.lineNumber = lineNumber

        self.lineStart = lineStart

        self.start = start

        self.end = end

class ScannerState(Object):

    def __init__(self, index=None, lineNumber=None, lineStart=None):

        self.index = index

        self.lineNumber = lineNumber

        self.lineStart = lineStart

class Octal(object):

    def __init__(self, octal, code):

        self.octal = octal

        self.code = code

class Scanner(object):

    def __init__(self, code, handler):

        self.source = unicode(code) + '\x00'

        self.errorHandler = handler

        self.trackComment = False

        self.isModule = False

        self.length = len(code)

        self.index = 0

        self.lineNumber = 1 if self.length > 0 else 0

        self.lineStart = 0

        self.curlyStack = []

    def saveState(self):

        return ScannerState(

            index=self.index,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart

    def restoreState(self, state):

        self.index = state.index

        self.lineNumber = state.lineNumber

        self.lineStart = state.lineStart

    def eof(self):

        return self.index >= self.length

    def throwUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):

        return self.errorHandler.throwError(self.index, self.lineNumber,

            self.index - self.lineStart + 1, message)

    def tolerateUnexpectedToken(self, message=Messages.UnexpectedTokenIllegal):

        self.errorHandler.tolerateError(self.index, self.lineNumber,

            self.index - self.lineStart + 1, message)

    # https://tc39.github.io/ecma262/#sec-comments

    def skipSingleLineComment(self, offset):

        comments = []

        if self.trackComment:

            start = self.index - offset

            loc = SourceLocation(

                start=Position(

                    line=self.lineNumber,

                    column=self.index - self.lineStart - offset

),

                end=Position()

        while not self.eof():

            ch = self.source[self.index]

            self.index += 1

            if Character.isLineTerminator(ch):

                if self.trackComment:

                    loc.end = Position(

                        line=self.lineNumber,

                        column=self.index - self.lineStart - 1

                    entry = Comment(

                        multiLine=False,

                        slice=[start + offset, self.index - 1],

                        range=[start, self.index - 1],

                        loc=loc

                    comments.append(entry)

                if ch == '\r' and self.source[self.index] == '\n':

                    self.index += 1

                self.lineNumber += 1

                self.lineStart = self.index

                return comments

        if self.trackComment:

            loc.end = Position(

                line=self.lineNumber,

                column=self.index - self.lineStart

            entry = Comment(

                multiLine=False,

                slice=[start + offset, self.index],

                range=[start, self.index],

                loc=loc

            comments.append(entry)

        return comments

    def skipMultiLineComment(self):

        comments = []

        if self.trackComment:

            comments = []

            start = self.index - 2

            loc = SourceLocation(

                start=Position(

                    line=self.lineNumber,

                    column=self.index - self.lineStart - 2

),

                end=Position()

        while not self.eof():

            ch = self.source[self.index]

            if Character.isLineTerminator(ch):

                if ch == '\r' and self.source[self.index + 1] == '\n':

                    self.index += 1

                self.lineNumber += 1

                self.index += 1

                self.lineStart = self.index

            elif ch == '*':

                # Block comment ends with '*/'.

                if self.source[self.index + 1] == '/':

                    self.index += 2

                    if self.trackComment:

                        loc.end = Position(

                            line=self.lineNumber,

                            column=self.index - self.lineStart

                        entry = Comment(

                            multiLine=True,

                            slice=[start + 2, self.index - 2],

                            range=[start, self.index],

                            loc=loc

                        comments.append(entry)

                    return comments

                self.index += 1

            else:

                self.index += 1

        # Ran off the end of the file - the whole thing is a comment

        if self.trackComment:

            loc.end = Position(

                line=self.lineNumber,

                column=self.index - self.lineStart

            entry = Comment(

                multiLine=True,

                slice=[start + 2, self.index],

                range=[start, self.index],

                loc=loc

            comments.append(entry)

        self.tolerateUnexpectedToken()

        return comments

    def scanComments(self):

        comments = []

        start = self.index == 0

        while not self.eof():

            ch = self.source[self.index]

            if Character.isWhiteSpace(ch):

                self.index += 1

            elif Character.isLineTerminator(ch):

                self.index += 1

                if ch == '\r' and self.source[self.index] == '\n':

                    self.index += 1

                self.lineNumber += 1

                self.lineStart = self.index

                start = True

            elif ch == '/':  # U+002F is '/'

                ch = self.source[self.index + 1]

                if ch == '/':

                    self.index += 2

                    comment = self.skipSingleLineComment(2)

                    if self.trackComment:

                        comments.extend(comment)

                    start = True

                elif ch == '*':  # U+002A is '*'

                    self.index += 2

                    comment = self.skipMultiLineComment()

                    if self.trackComment:

                        comments.extend(comment)

                else:

                    break

            elif start and ch == '-':  # U+002D is '-'

                # U+003E is '>'

                if self.source[self.index + 1:self.index + 3] == '->':

                    # '-->' is a single-line comment

                    self.index += 3

                    comment = self.skipSingleLineComment(3)

                    if self.trackComment:

                        comments.extend(comment)

                else:

                    break

            elif ch == '<' and not self.isModule:  # U+003C is '<'

                if self.source[self.index + 1:self.index + 4] == '!--':

                    self.index += 4  # `<!--`

                    comment = self.skipSingleLineComment(4)

                    if self.trackComment:

                        comments.extend(comment)

                else:

                    break

            else:

                break

        return comments

    # https://tc39.github.io/ecma262/#sec-future-reserved-words

    def isFutureReservedWord(self, id):

        return id in self.isFutureReservedWord.set

    isFutureReservedWord.set = set((

        'enum',

        'export',

        'import',

        'super',

))

    def isStrictModeReservedWord(self, id):

        return id in self.isStrictModeReservedWord.set

    isStrictModeReservedWord.set = set((

        'implements',

        'interface',

        'package',

        'private',

        'protected',

        'public',

        'static',

        'yield',

        'let',

))

    def isRestrictedWord(self, id):

        return id in self.isRestrictedWord.set

    isRestrictedWord.set = set((

        'eval', 'arguments',

))

    # https://tc39.github.io/ecma262/#sec-keywords

    def isKeyword(self, id):

        return id in self.isKeyword.set

    isKeyword.set = set((

        'if', 'in', 'do',

        'var', 'for', 'new',

        'try', 'let',

        'this', 'else', 'case',

        'void', 'with', 'enum',

        'while', 'break', 'catch',

        'throw', 'const', 'yield',

        'class', 'super',

        'return', 'typeof', 'delete',

        'switch', 'export', 'import',

        'default', 'finally', 'extends',

        'function', 'continue', 'debugger',

        'instanceof',

))

    def codePointAt(self, i):

        return uord(self.source[i:i + 2])

    def scanHexEscape(self, prefix):

        length = 4 if prefix == 'u' else 2

        code = 0

        for i in xrange(length):

            if not self.eof() and Character.isHexDigit(self.source[self.index]):

                ch = self.source[self.index]

                self.index += 1

                code = code * 16 + hexValue(ch)

            else:

                return None

        return uchr(code)

    def scanUnicodeCodePointEscape(self):

        ch = self.source[self.index]

        code = 0

        # At least, one hex digit is required.

        if ch == '}':

            self.throwUnexpectedToken()

        while not self.eof():

            ch = self.source[self.index]

            self.index += 1

            if not Character.isHexDigit(ch):

                break

            code = code * 16 + hexValue(ch)

        if code > 0x10FFFF or ch != '}':

            self.throwUnexpectedToken()

        return Character.fromCodePoint(code)

    def getIdentifier(self):

        start = self.index

        self.index += 1

        while not self.eof():

            ch = self.source[self.index]

            if ch == '\\':

                # Blackslash (U+005C) marks Unicode escape sequence.

                self.index = start

                return self.getComplexIdentifier()

            else:

                cp = ord(ch)

                if cp >= 0xD800 and cp < 0xDFFF:

                    # Need to handle surrogate pairs.

                    self.index = start

                    return self.getComplexIdentifier()

            if Character.isIdentifierPart(ch):

                self.index += 1

            else:

                break

        return self.source[start:self.index]

    def getComplexIdentifier(self):

        cp = self.codePointAt(self.index)

        id = Character.fromCodePoint(cp)

        self.index += len(id)

        # '\u' (U+005C, U+0075) denotes an escaped character.

        if cp == 0x5C:

            if self.source[self.index] != 'u':

                self.throwUnexpectedToken()

            self.index += 1

            if self.source[self.index] == '{':

                self.index += 1

                ch = self.scanUnicodeCodePointEscape()

            else:

                ch = self.scanHexEscape('u')

                if not ch or ch == '\\' or not Character.isIdentifierStart(ch[0]):

                    self.throwUnexpectedToken()

            id = ch

        while not self.eof():

            cp = self.codePointAt(self.index)

            ch = Character.fromCodePoint(cp)

            if not Character.isIdentifierPart(ch):

                break

            id += ch

            self.index += len(ch)

            # '\u' (U+005C, U+0075) denotes an escaped character.

            if cp == 0x5C:

                id = id[:-1]

                if self.source[self.index] != 'u':

                    self.throwUnexpectedToken()

                self.index += 1

                if self.source[self.index] == '{':

                    self.index += 1

                    ch = self.scanUnicodeCodePointEscape()

                else:

                    ch = self.scanHexEscape('u')

                    if not ch or ch == '\\' or not Character.isIdentifierPart(ch[0]):

                        self.throwUnexpectedToken()

                id += ch

        return id

    def octalToDecimal(self, ch):

        # \0 is not octal escape sequence

        octal = ch != '0'

        code = octalValue(ch)

        if not self.eof() and Character.isOctalDigit(self.source[self.index]):

            octal = True

            code = code * 8 + octalValue(self.source[self.index])

            self.index += 1

            # 3 digits are only allowed when string starts

            # with 0, 1, 2, 3

            if ch in '0123' and not self.eof() and Character.isOctalDigit(self.source[self.index]):

                code = code * 8 + octalValue(self.source[self.index])

                self.index += 1

        return Octal(octal, code)

    # https://tc39.github.io/ecma262/#sec-names-and-keywords

    def scanIdentifier(self):

        start = self.index

        # Backslash (U+005C) starts an escaped character.

        id = self.getComplexIdentifier() if self.source[start] == '\\' else self.getIdentifier()

        # There is no keyword or literal with only one character.

        # Thus, it must be an identifier.

        if len(id) == 1:

            type = Token.Identifier

        elif self.isKeyword(id):

            type = Token.Keyword

        elif id == 'null':

            type = Token.NullLiteral

        elif id == 'true' or id == 'false':

            type = Token.BooleanLiteral

        else:

            type = Token.Identifier

        if type is not Token.Identifier and start + len(id) != self.index:

            restore = self.index

            self.index = start

            self.tolerateUnexpectedToken(Messages.InvalidEscapedReservedWord)

            self.index = restore

        return RawToken(

            type=type,

            value=id,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    # https://tc39.github.io/ecma262/#sec-punctuators

    def scanPunctuator(self):

        start = self.index

        # Check for most common single-character punctuators.

        str = self.source[self.index]

        if str in (

            '(',

            '{',

):

            if str == '{':

                self.curlyStack.append('{')

            self.index += 1

        elif str == '.':

            self.index += 1

            if self.source[self.index] == '.' and self.source[self.index + 1] == '.':

                # Spread operator: ...

                self.index += 2

                str = '...'

        elif str == '}':

            self.index += 1

            if self.curlyStack:

                self.curlyStack.pop()

        elif str in (

            ')',

            ';',

            ',',

            '[',

            ']',

            ':',

            '?',

            '~',

):

            self.index += 1

        else:

            # 4-character punctuator.

            str = self.source[self.index:self.index + 4]

            if str == '>>>=':

                self.index += 4

            else:

                # 3-character punctuators.

                str = str[:3]

                if str in (

                    '===', '!==', '>>>',

                    '<<=', '>>=', '**='

):

                    self.index += 3

                else:

                    # 2-character punctuators.

                    str = str[:2]

                    if str in (

                        '&&', '||', '==', '!=',

                        '+=', '-=', '*=', '/=',

                        '++', '--', '<<', '>>',

                        '&=', '|=', '^=', '%=',

                        '<=', '>=', '=>', '**',

):

                        self.index += 2

                    else:

                        # 1-character punctuators.

                        str = self.source[self.index]

                        if str in '<>=!+-*%&|^/':

                            self.index += 1

        if self.index == start:

            self.throwUnexpectedToken()

        return RawToken(

            type=Token.Punctuator,

            value=str,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    # https://tc39.github.io/ecma262/#sec-literals-numeric-literals

    def scanHexLiteral(self, start):

        num = ''

        while not self.eof():

            if not Character.isHexDigit(self.source[self.index]):

                break

            num += self.source[self.index]

            self.index += 1

        if len(num) == 0:

            self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]):

            self.throwUnexpectedToken()

        return RawToken(

            type=Token.NumericLiteral,

            value=int(num, 16),

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    def scanBinaryLiteral(self, start):

        num = ''

        while not self.eof():

            ch = self.source[self.index]

            if ch != '0' and ch != '1':

                break

            num += self.source[self.index]

            self.index += 1

        if len(num) == 0:

            # only 0b or 0B

            self.throwUnexpectedToken()

        if not self.eof():

            ch = self.source[self.index]

            if Character.isIdentifierStart(ch) or Character.isDecimalDigit(ch):

                self.throwUnexpectedToken()

        return RawToken(

            type=Token.NumericLiteral,

            value=int(num, 2),

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    def scanOctalLiteral(self, prefix, start):

        num = ''

        octal = False

        if Character.isOctalDigit(prefix[0]):

            octal = True

            num = '0' + self.source[self.index]

        self.index += 1

        while not self.eof():

            if not Character.isOctalDigit(self.source[self.index]):

                break

            num += self.source[self.index]

            self.index += 1

        if not octal and len(num) == 0:

            # only 0o or 0O

            self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]) or Character.isDecimalDigit(self.source[self.index]):

            self.throwUnexpectedToken()

        return RawToken(

            type=Token.NumericLiteral,

            value=int(num, 8),

            octal=octal,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    def isImplicitOctalLiteral(self):

        # Implicit octal, unless there is a non-octal digit.

        # (Annex B.1.1 on Numeric Literals)

        for i in xrange(self.index + 1, self.length):

            ch = self.source[i]

            if ch in '89':

                return False

            if not Character.isOctalDigit(ch):

                return True

        return True

    def scanNumericLiteral(self):

        start = self.index

        ch = self.source[start]

        assert Character.isDecimalDigit(ch) or ch == '.', 'Numeric literal must start with a decimal digit or a decimal point'

        num = ''

        if ch != '.':

            num = self.source[self.index]

            self.index += 1

            ch = self.source[self.index]

            # Hex number starts with '0x'.

            # Octal number starts with '0'.

            # Octal number in ES6 starts with '0o'.

            # Binary number in ES6 starts with '0b'.

            if num == '0':

                if ch in ('x', 'X'):

                    self.index += 1

                    return self.scanHexLiteral(start)

                if ch in ('b', 'B'):

                    self.index += 1

                    return self.scanBinaryLiteral(start)

                if ch in ('o', 'O'):

                    return self.scanOctalLiteral(ch, start)

                if ch and Character.isOctalDigit(ch):

                    if self.isImplicitOctalLiteral():

                        return self.scanOctalLiteral(ch, start)

            while Character.isDecimalDigit(self.source[self.index]):

                num += self.source[self.index]

                self.index += 1

            ch = self.source[self.index]

        if ch == '.':

            num += self.source[self.index]

            self.index += 1

            while Character.isDecimalDigit(self.source[self.index]):

                num += self.source[self.index]

                self.index += 1

            ch = self.source[self.index]

        if ch in ('e', 'E'):

            num += self.source[self.index]

            self.index += 1

            ch = self.source[self.index]

            if ch in ('+', '-'):

                num += self.source[self.index]

                self.index += 1

            if Character.isDecimalDigit(self.source[self.index]):

                while Character.isDecimalDigit(self.source[self.index]):

                    num += self.source[self.index]

                    self.index += 1

            else:

                self.throwUnexpectedToken()

        if Character.isIdentifierStart(self.source[self.index]):

            self.throwUnexpectedToken()

        value = float(num)

        return RawToken(

            type=Token.NumericLiteral,

            value=int(value) if value.is_integer() else value,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    # https://tc39.github.io/ecma262/#sec-literals-string-literals

    def scanStringLiteral(self):

        start = self.index

        quote = self.source[start]

        assert quote in ('\'', '"'), 'String literal must starts with a quote'

        self.index += 1

        octal = False

        str = ''

        while not self.eof():

            ch = self.source[self.index]

            self.index += 1

            if ch == quote:

                quote = ''

                break

            elif ch == '\\':

                ch = self.source[self.index]

                self.index += 1

                if not ch or not Character.isLineTerminator(ch):

                    if ch == 'u':

                        if self.source[self.index] == '{':

                            self.index += 1

                            str += self.scanUnicodeCodePointEscape()

                        else:

                            unescapedChar = self.scanHexEscape(ch)

                            if not unescapedChar:

                                self.throwUnexpectedToken()

                            str += unescapedChar

                    elif ch == 'x':

                        unescaped = self.scanHexEscape(ch)

                        if not unescaped:

                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)

                        str += unescaped

                    elif ch == 'n':

                        str += '\n'

                    elif ch == 'r':

                        str += '\r'

                    elif ch == 't':

                        str += '\t'

                    elif ch == 'b':

                        str += '\b'

                    elif ch == 'f':

                        str += '\f'

                    elif ch == 'v':

                        str += '\x0B'

                    elif ch in (

                        '8',

                        '9',

):

                        str += ch

                        self.tolerateUnexpectedToken()

                    else:

                        if ch and Character.isOctalDigit(ch):

                            octToDec = self.octalToDecimal(ch)

                            octal = octToDec.octal or octal

                            str += uchr(octToDec.code)

                        else:

                            str += ch

                else:

                    self.lineNumber += 1

                    if ch == '\r' and self.source[self.index] == '\n':

                        self.index += 1

                    self.lineStart = self.index

            elif Character.isLineTerminator(ch):

                break

            else:

                str += ch

        if quote != '':

            self.index = start

            self.throwUnexpectedToken()

        return RawToken(

            type=Token.StringLiteral,

            value=str,

            octal=octal,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    # https://tc39.github.io/ecma262/#sec-template-literal-lexical-components

    def scanTemplate(self):

        cooked = ''

        terminated = False

        start = self.index

        head = self.source[start] == '`'

        tail = False

        rawOffset = 2

        self.index += 1

        while not self.eof():

            ch = self.source[self.index]

            self.index += 1

            if ch == '`':

                rawOffset = 1

                tail = True

                terminated = True

                break

            elif ch == '$':

                if self.source[self.index] == '{':

                    self.curlyStack.append('${')

                    self.index += 1

                    terminated = True

                    break

                cooked += ch

            elif ch == '\\':

                ch = self.source[self.index]

                self.index += 1

                if not Character.isLineTerminator(ch):

                    if ch == 'n':

                        cooked += '\n'

                    elif ch == 'r':

                        cooked += '\r'

                    elif ch == 't':

                        cooked += '\t'

                    elif ch == 'u':

                        if self.source[self.index] == '{':

                            self.index += 1

                            cooked += self.scanUnicodeCodePointEscape()

                        else:

                            restore = self.index

                            unescapedChar = self.scanHexEscape(ch)

                            if unescapedChar:

                                cooked += unescapedChar

                            else:

                                self.index = restore

                                cooked += ch

                    elif ch == 'x':

                        unescaped = self.scanHexEscape(ch)

                        if not unescaped:

                            self.throwUnexpectedToken(Messages.InvalidHexEscapeSequence)

                        cooked += unescaped

                    elif ch == 'b':

                        cooked += '\b'

                    elif ch == 'f':

                        cooked += '\f'

                    elif ch == 'v':

                        cooked += '\v'

                    else:

                        if ch == '0':

                            if Character.isDecimalDigit(self.source[self.index]):

                                # Illegal: \01 \02 and so on

                                self.throwUnexpectedToken(Messages.TemplateOctalLiteral)

                            cooked += '\0'

                        elif Character.isOctalDigit(ch):

                            # Illegal: \1 \2

                            self.throwUnexpectedToken(Messages.TemplateOctalLiteral)

                        else:

                            cooked += ch

                else:

                    self.lineNumber += 1

                    if ch == '\r' and self.source[self.index] == '\n':

                        self.index += 1

                    self.lineStart = self.index

            elif Character.isLineTerminator(ch):

                self.lineNumber += 1

                if ch == '\r' and self.source[self.index] == '\n':

                    self.index += 1

                self.lineStart = self.index

                cooked += '\n'

            else:

                cooked += ch

        if not terminated:

            self.throwUnexpectedToken()

        if not head:

            if self.curlyStack:

                self.curlyStack.pop()

        return RawToken(

            type=Token.Template,

            value=self.source[start + 1:self.index - rawOffset],

            cooked=cooked,

            head=head,

            tail=tail,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals

    def testRegExp(self, pattern, flags):

        # The BMP character to use as a replacement for astral symbols when

        # translating an ES6 "u"-flagged pattern to an ES5-compatible

        # approximation.

        # Note: replacing with '\uFFFF' enables false positives in unlikely

        # scenarios. For example, `[\u{1044f}-\u{10440}]` is an invalid

        # pattern that would not be detected by this substitution.

        astralSubstitute = '\uFFFF'

        # Replace every Unicode escape sequence with the equivalent

        # BMP character or a constant ASCII code point in the case of

        # astral symbols. (See the above note on `astralSubstitute`

        # for more information.)

        def astralSub(m):

            codePoint = int(m.group(1) or m.group(2), 16)

            if codePoint > 0x10FFFF:

                self.tolerateUnexpectedToken(Messages.InvalidRegExp)

            elif codePoint <= 0xFFFF:

                return uchr(codePoint)

            return astralSubstitute

        pattern = re.sub(r'\\u\{([0-9a-fA-F]+)\}|\\u([a-fA-F0-9]{4})', astralSub, pattern)

        # Replace each paired surrogate with a single ASCII symbol to

        # avoid throwing on regular expressions that are only valid in

        # combination with the "u" flag.

        pattern = re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', astralSubstitute, pattern)

        # Return a regular expression object for this pattern-flag pair, or

        # `null` in case the current environment doesn't support the flags it

        # uses.

        pyflags = 0 | re.M if 'm' in flags else 0 | re.I if 'i' in flags else 0

        try:

            return re.compile(pattern, pyflags)

        except Exception:

            self.tolerateUnexpectedToken(Messages.InvalidRegExp)

    def scanRegExpBody(self):

        ch = self.source[self.index]

        assert ch == '/', 'Regular expression literal must start with a slash'

        str = self.source[self.index]

        self.index += 1

        classMarker = False

        terminated = False

        while not self.eof():

            ch = self.source[self.index]

            self.index += 1

            str += ch

            if ch == '\\':

                ch = self.source[self.index]

                self.index += 1

                # https://tc39.github.io/ecma262/#sec-literals-regular-expression-literals

                if Character.isLineTerminator(ch):

                    self.throwUnexpectedToken(Messages.UnterminatedRegExp)

                str += ch

            elif Character.isLineTerminator(ch):

                self.throwUnexpectedToken(Messages.UnterminatedRegExp)

            elif classMarker:

                if ch == ']':

                    classMarker = False

            else:

                if ch == '/':

                    terminated = True

                    break

                elif ch == '[':

                    classMarker = True

        if not terminated:

            self.throwUnexpectedToken(Messages.UnterminatedRegExp)

        # Exclude leading and trailing slash.

        return str[1:-1]

    def scanRegExpFlags(self):

        str = ''

        flags = ''

        while not self.eof():

            ch = self.source[self.index]

            if not Character.isIdentifierPart(ch):

                break

            self.index += 1

            if ch == '\\' and not self.eof():

                ch = self.source[self.index]

                if ch == 'u':

                    self.index += 1

                    restore = self.index

                    char = self.scanHexEscape('u')

                    if char:

                        flags += char

                        str += '\\u'

                        while restore < self.index:

                            str += self.source[restore]

                            restore += 1

                    else:

                        self.index = restore

                        flags += 'u'

                        str += '\\u'

                    self.tolerateUnexpectedToken()

                else:

                    str += '\\'

                    self.tolerateUnexpectedToken()

            else:

                flags += ch

                str += ch

        return flags

    def scanRegExp(self):

        start = self.index

        pattern = self.scanRegExpBody()

        flags = self.scanRegExpFlags()

        value = self.testRegExp(pattern, flags)

        return RawToken(

            type=Token.RegularExpression,

            value='',

            pattern=pattern,

            flags=flags,

            regex=value,

            lineNumber=self.lineNumber,

            lineStart=self.lineStart,

            start=start,

            end=self.index

    def lex(self):

        if self.eof():

            return RawToken(

                type=Token.EOF,

                value='',

                lineNumber=self.lineNumber,

                lineStart=self.lineStart,

                start=self.index,

                end=self.index

        ch = self.source[self.index]

        if Character.isIdentifierStart(ch):

            return self.scanIdentifier()

        # Very common: ( and ) and ;

        if ch in ('(', ')', ';'):

            return self.scanPunctuator()

        # String literal starts with single quote (U+0027) or double quote (U+0022).

        if ch in ('\'', '"'):

            return self.scanStringLiteral()

        # Dot (.) U+002E can also start a floating-point number, hence the need

        # to check the next character.

        if ch == '.':

            if Character.isDecimalDigit(self.source[self.index + 1]):

                return self.scanNumericLiteral()

            return self.scanPunctuator()

        if Character.isDecimalDigit(ch):

            return self.scanNumericLiteral()

        # Template literals start with ` (U+0060) for template head

        # or } (U+007D) for template middle or template tail.

        if ch == '`' or (ch == '}' and self.curlyStack and self.curlyStack[-1] == '${'):

            return self.scanTemplate()

        # Possible identifier start in a surrogate pair.

        cp = ord(ch)

        if cp >= 0xD800 and cp < 0xDFFF:

            cp = self.codePointAt(self.index)

            ch = Character.fromCodePoint(cp)

            if Character.isIdentifierStart(ch):

                return self.scanIdentifier()

        return self.scanPunctuator()