DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
/*
**********************************************************************
*   Copyright (c) 2001-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/19/2001  aliu        Creation.
**********************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/utf16.h"
#include "esctrn.h"
#include "util.h"

U_NAMESPACE_BEGIN

static const UChar UNIPRE[] = {85,43,0}; // "U+"
static const UChar BS_u[] = {92,117,0}; // "\\u"
static const UChar BS_U[] = {92,85,0}; // "\\U"
static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
static const UChar XML10PRE[] = {38,35,0}; // "&#"
static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
static const UChar SEMI[] = {59,0}; // ";"
static const UChar RBRACE[] = {125,0}; // "}"

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)

/**
 * Factory methods
 */
static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // Unicode: "U+10FFFF" hex, min=4, max=6
    return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
}
static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // Java: "\\uFFFF" hex, min=4, max=4
    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
}
static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
             new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
}
static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // XML: "" hex, min=1, max=6
    return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
}
static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
    return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
}
static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
    // Perl: "\\x{263A}" hex, min=1, max=6
    return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
}

/**
 * Registers standard variants with the system.  Called by
 * Transliterator during initialization.
 */
void EscapeTransliterator::registerIDs() {
    Token t = integerToken(0);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);

    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
}

/**
 * Constructs an escape transliterator with the given ID and
 * parameters.  See the class member documentation for details.
 */
EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
                         const UnicodeString& _prefix, const UnicodeString& _suffix,
                         int32_t _radix, int32_t _minDigits,
                         UBool _grokSupplementals,
                         EscapeTransliterator* adoptedSupplementalHandler) :
    Transliterator(newID, NULL)
{
    this->prefix = _prefix;
    this->suffix = _suffix;
    this->radix = _radix;
    this->minDigits = _minDigits;
    this->grokSupplementals = _grokSupplementals;
    this->supplementalHandler = adoptedSupplementalHandler;
}

/**
 * Copy constructor.
 */
EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
    Transliterator(o),
    prefix(o.prefix),
    suffix(o.suffix),
    radix(o.radix),
    minDigits(o.minDigits),
    grokSupplementals(o.grokSupplementals) {
    supplementalHandler = (o.supplementalHandler != 0) ?
        new EscapeTransliterator(*o.supplementalHandler) : NULL;
}

EscapeTransliterator::~EscapeTransliterator() {
    delete supplementalHandler;
}

/**
 * Transliterator API.
 */
Transliterator* EscapeTransliterator::clone() const {
    return new EscapeTransliterator(*this);
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void EscapeTransliterator::handleTransliterate(Replaceable& text,
                                               UTransPosition& pos,
                                               UBool /*isIncremental*/) const
{
    /* TODO: Verify that isIncremental can be ignored */
    int32_t start = pos.start;
    int32_t limit = pos.limit;

    UnicodeString buf(prefix);
    int32_t prefixLen = prefix.length();
    UBool redoPrefix = FALSE;

    while (start < limit) {
        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;

        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
            buf.truncate(0);
            buf.append(supplementalHandler->prefix);
            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
                                  supplementalHandler->minDigits);
            buf.append(supplementalHandler->suffix);
            redoPrefix = TRUE;
        } else {
            if (redoPrefix) {
                buf.truncate(0);
                buf.append(prefix);
                redoPrefix = FALSE;
            } else {
                buf.truncate(prefixLen);
            }
            ICU_Utility::appendNumber(buf, c, radix, minDigits);
            buf.append(suffix);
        }

        text.handleReplaceBetween(start, start + charLen, buf);
        start += buf.length();
        limit += buf.length() - charLen;
    }

    pos.contextLimit += limit - pos.limit;
    pos.limit = limit;
    pos.start = start;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

//eof