Source code

Revision control

Copy as Markdown

Other Tools

# -*- coding: utf-8 -*-
# Copyright JS Foundation and other contributors, https://js.foundation/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import absolute_import, unicode_literals
import sys
import unicodedata
from collections import defaultdict
from .compat import uchr, xrange
U_CATEGORIES = defaultdict(list)
for c in map(uchr, xrange(sys.maxunicode + 1)):
U_CATEGORIES[unicodedata.category(c)].append(c)
UNICODE_LETTER = set(
U_CATEGORIES['Lu'] + U_CATEGORIES['Ll'] +
U_CATEGORIES['Lt'] + U_CATEGORIES['Lm'] +
U_CATEGORIES['Lo'] + U_CATEGORIES['Nl']
)
UNICODE_OTHER_ID_START = set((
# Other_ID_Start
'\u1885', '\u1886', '\u2118', '\u212E', '\u309B', '\u309C',
# New in Unicode 8.0
'\u08B3', '\u0AF9', '\u13F8', '\u9FCD', '\uAB60', '\U00010CC0', '\U000108E0', '\U0002B820',
# New in Unicode 9.0
'\u1C80', '\U000104DB', '\U0001E922',
'\U0001EE00', '\U0001EE06', '\U0001EE0A',
))
UNICODE_OTHER_ID_CONTINUE = set((
# Other_ID_Continue
'\xB7', '\u0387', '\u1369', '\u136A', '\u136B', '\u136C',
'\u136D', '\u136E', '\u136F', '\u1370', '\u1371', '\u19DA',
# New in Unicode 8.0
'\u08E3', '\uA69E', '\U00011730',
# New in Unicode 9.0
'\u08D4', '\u1DFB', '\uA8C5', '\U00011450',
'\U0001EE03', '\U0001EE0B',
))
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn'] + U_CATEGORIES['Mc'])
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
IDENTIFIER_START = UNICODE_LETTER.union(UNICODE_OTHER_ID_START).union(set(('$', '_', '\\')))
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union(set(('\u200D', '\u200C'))).union(UNICODE_OTHER_ID_CONTINUE)
WHITE_SPACE = set((
'\x09', '\x0B', '\x0C', '\x20', '\xA0',
'\u1680', '\u180E', '\u2000', '\u2001', '\u2002',
'\u2003', '\u2004', '\u2005', '\u2006', '\u2007',
'\u2008', '\u2009', '\u200A', '\u202F', '\u205F',
'\u3000', '\uFEFF',
))
LINE_TERMINATOR = set(('\x0A', '\x0D', '\u2028', '\u2029'))
DECIMAL_CONV = dict((c, n) for n, c in enumerate('0123456789'))
OCTAL_CONV = dict((c, n) for n, c in enumerate('01234567'))
HEX_CONV = dict((c, n) for n, c in enumerate('0123456789abcdef'))
for n, c in enumerate('ABCDEF', 10):
HEX_CONV[c] = n
DECIMAL_DIGIT = set(DECIMAL_CONV.keys())
OCTAL_DIGIT = set(OCTAL_CONV.keys())
HEX_DIGIT = set(HEX_CONV.keys())
class Character:
@staticmethod
def fromCodePoint(code):
return uchr(code)
@staticmethod
def isWhiteSpace(ch):
return ch in WHITE_SPACE
@staticmethod
def isLineTerminator(ch):
return ch in LINE_TERMINATOR
@staticmethod
def isIdentifierStart(ch):
return ch in IDENTIFIER_START
@staticmethod
def isIdentifierPart(ch):
return ch in IDENTIFIER_PART
@staticmethod
def isDecimalDigit(ch):
return ch in DECIMAL_DIGIT
@staticmethod
def isHexDigit(ch):
return ch in HEX_DIGIT
@staticmethod
def isOctalDigit(ch):
return ch in OCTAL_DIGIT