DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (33b7b8e81b4b)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import os.path
import re
import sys

f = open(sys.argv[1] if len(sys.argv) > 1 else 'StandardizedVariants.txt')

line = f.readline()
m = re.compile('^# (StandardizedVariants(-\d+(\.\d+)*)?\.txt)').search(line)
fileversion = m.group(1)
vsdict = {}
r = re.compile('^([0-9A-F]{4,6}) (FE0[0-9A-F]); CJK COMPATIBILITY IDEOGRAPH-([0-9A-F]{4,6});')
while True:
    line = f.readline()
    if not line:
        break
    if not 'CJK COMPATIBILITY IDEOGRAPH-' in line:
        continue

    m = r.search(line)
    unified = int(m.group(1), 16)
    vs = int(m.group(2), 16)
    compat = int(m.group(3), 16)

    if not vs in vsdict:
        vsdict[vs] = {}
    vsdict[vs][unified] = compat

f.close

offsets = []
length = 10 + 11 * len(vsdict)
for (k, mappings) in sorted(vsdict.items()):
    offsets.append(length)
    length += 4 + 5 * len(mappings)

f = open(sys.argv[2] if len(sys.argv) > 2 else 'CJKCompatSVS.cpp', 'wb')
f.write("""// Generated by %s. Do not edit.

#include <stdint.h>

#define U16(v) (((v) >> 8) & 0xFF), ((v) & 0xFF)
#define U24(v) (((v) >> 16) & 0xFF), (((v) >> 8) & 0xFF), ((v) & 0xFF)
#define U32(v) (((v) >> 24) & 0xFF), (((v) >> 16) & 0xFF), (((v) >> 8) & 0xFF), ((v) & 0xFF)
#define GLYPH(v) U16(v >= 0x2F800 ? (v) - (0x2F800 - 0xFB00) : (v))

// Fallback mappings for CJK Compatibility Ideographs Standardized Variants
// taken from %s.
// Using OpenType format 14 cmap subtable structure to reuse the lookup code
// for fonts. The glyphID field is used to store the corresponding codepoints
// CJK Compatibility Ideographs. To fit codepoints into the 16-bit glyphID
// field, CJK Compatibility Ideographs Supplement (U+2F800..U+2FA1F) will be
// mapped to 0xFB00..0xFD1F.
extern const uint8_t sCJKCompatSVSTable[] = {
""" % (os.path.basename(sys.argv[0]), fileversion))
f.write('  U16(14), // format\n')
f.write('  U32(%d), // length\n' % length)
f.write('  U32(%d), // numVarSelectorRecords\n' % len(vsdict))
for i, k in enumerate(sorted(vsdict.keys())):
    f.write('    U24(0x%04X), U32(0), U32(%d), // varSelectorRecord[%d]\n' % (k, offsets[i], i))
for (k, mappings) in sorted(vsdict.items()):
    f.write('  // 0x%04X\n' % k)
    f.write('  U32(%d), // numUVSMappings\n' % len(mappings))
    for (unified, compat) in sorted(mappings.items()):
        f.write('    U24(0x%04X), GLYPH(0x%04X),\n' % (unified, compat))
f.write("""};

#undef U16
#undef U24
#undef U32
#undef GLYPH

static_assert(sizeof sCJKCompatSVSTable == %d, "Table generator has a bug.");
""" % length)