DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
/*
**********************************************************************
*   Copyright (C) 2001-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   05/24/01    aliu        Creation.
**********************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "titletrn.h"
#include "umutex.h"
#include "ucase.h"
#include "cpputils.h"

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)

TitlecaseTransliterator::TitlecaseTransliterator() :
    CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
{
    // Need to look back 2 characters in the case of "can't"
    setMaximumContextLength(2);
}

/**
 * Destructor.
 */
TitlecaseTransliterator::~TitlecaseTransliterator() {
}

/**
 * Copy constructor.
 */
TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
    CaseMapTransliterator(o)
{
}

/**
 * Assignment operator.
 */
/*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
                             const TitlecaseTransliterator& o) {
    CaseMapTransliterator::operator=(o);
    return *this;
}*/

/**
 * Transliterator API.
 */
Transliterator* TitlecaseTransliterator::clone(void) const {
    return new TitlecaseTransliterator(*this);
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const
{
    // TODO reimplement, see ustrcase.c
    // using a real word break iterator
    //   instead of just looking for a transition between cased and uncased characters
    // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
    // needs to take isIncremental into account because case mappings are context-sensitive
    //   also detect when lowercasing function did not finish because of context

    if (offsets.start >= offsets.limit) {
        return;
    }

    // case type: >0 cased (UCASE_LOWER etc.)  ==0 uncased  <0 case-ignorable
    int32_t type;

    // Our mode; we are either converting letter toTitle or
    // toLower.
    UBool doTitle = TRUE;
    
    // Determine if there is a preceding context of cased case-ignorable*,
    // in which case we want to start in toLower mode.  If the
    // prior context is anything else (including empty) then start
    // in toTitle mode.
    UChar32 c;
    int32_t start;
    for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
        c = text.char32At(start);
        type=ucase_getTypeOrIgnorable(fCsp, c);
        if(type>0) { // cased
            doTitle=FALSE;
            break;
        } else if(type==0) { // uncased but not ignorable
            break;
        }
        // else (type<0) case-ignorable: continue
    }
    
    // Convert things after a cased character toLower; things
    // after an uncased, non-case-ignorable character toTitle.  Case-ignorable
    // characters are copied directly and do not change the mode.
    UCaseContext csc;
    uprv_memset(&csc, 0, sizeof(csc));
    csc.p = &text;
    csc.start = offsets.contextStart;
    csc.limit = offsets.contextLimit;

    UnicodeString tmp;
    const UChar *s;
    int32_t textPos, delta, result, locCache=0;

    for(textPos=offsets.start; textPos<offsets.limit;) {
        csc.cpStart=textPos;
        c=text.char32At(textPos);
        csc.cpLimit=textPos+=U16_LENGTH(c);

        type=ucase_getTypeOrIgnorable(fCsp, c);
        if(type>=0) { // not case-ignorable
            if(doTitle) {
                result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
            } else {
                result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
            }
            doTitle = (UBool)(type==0); // doTitle=isUncased

            if(csc.b1 && isIncremental) {
                // fMap() tried to look beyond the context limit
                // wait for more input
                offsets.start=csc.cpStart;
                return;
            }

            if(result>=0) {
                // replace the current code point with its full case mapping result
                // see UCASE_MAX_STRING_LENGTH
                if(result<=UCASE_MAX_STRING_LENGTH) {
                    // string s[result]
                    tmp.setTo(FALSE, s, result);
                    delta=result-U16_LENGTH(c);
                } else {
                    // single code point
                    tmp.setTo(result);
                    delta=tmp.length()-U16_LENGTH(c);
                }
                text.handleReplaceBetween(csc.cpStart, textPos, tmp);
                if(delta!=0) {
                    textPos+=delta;
                    csc.limit=offsets.contextLimit+=delta;
                    offsets.limit+=delta;
                }
            }
        }
    }
    offsets.start=textPos;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */