DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (b6d82b1a6b02)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*   Copyright (C) 2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  ustr_titlecase_brkiter.cpp
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2011may30
*   created by: Markus W. Scherer
*
*   Titlecasing functions that are based on BreakIterator
*   were moved here to break dependency cycles among parts of the common library.
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_BREAK_ITERATION

#include "unicode/brkiter.h"
#include "unicode/casemap.h"
#include "unicode/chariter.h"
#include "unicode/localpointer.h"
#include "unicode/ubrk.h"
#include "unicode/ucasemap.h"
#include "unicode/utext.h"
#include "cmemory.h"
#include "uassert.h"
#include "ucase.h"
#include "ucasemap_imp.h"

U_NAMESPACE_BEGIN

/**
 * Whole-string BreakIterator.
 * Titlecasing only calls setText(), first(), and next().
 * We implement the rest only to satisfy the abstract interface.
 */
class WholeStringBreakIterator : public BreakIterator {
public:
    WholeStringBreakIterator() : BreakIterator(), length(0) {}
    ~WholeStringBreakIterator() U_OVERRIDE;
    UBool operator==(const BreakIterator&) const U_OVERRIDE;
    BreakIterator *clone() const U_OVERRIDE;
    static UClassID U_EXPORT2 getStaticClassID();
    UClassID getDynamicClassID() const U_OVERRIDE;
    CharacterIterator &getText() const U_OVERRIDE;
    UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE;
    void  setText(const UnicodeString &text) U_OVERRIDE;
    void  setText(UText *text, UErrorCode &errorCode) U_OVERRIDE;
    void  adoptText(CharacterIterator* it) U_OVERRIDE;
    int32_t first() U_OVERRIDE;
    int32_t last() U_OVERRIDE;
    int32_t previous() U_OVERRIDE;
    int32_t next() U_OVERRIDE;
    int32_t current() const U_OVERRIDE;
    int32_t following(int32_t offset) U_OVERRIDE;
    int32_t preceding(int32_t offset) U_OVERRIDE;
    UBool isBoundary(int32_t offset) U_OVERRIDE;
    int32_t next(int32_t n) U_OVERRIDE;
    BreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize,
                                     UErrorCode &errorCode) U_OVERRIDE;
    BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE;

private:
    int32_t length;
};

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator)

WholeStringBreakIterator::~WholeStringBreakIterator() {}
UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; }
BreakIterator *WholeStringBreakIterator::clone() const { return nullptr; }

CharacterIterator &WholeStringBreakIterator::getText() const {
    UPRV_UNREACHABLE;  // really should not be called
}
UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const {
    if (U_SUCCESS(errorCode)) {
        errorCode = U_UNSUPPORTED_ERROR;
    }
    return nullptr;
}

void  WholeStringBreakIterator::setText(const UnicodeString &text) {
    length = text.length();
}
void  WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) {
    if (U_SUCCESS(errorCode)) {
        int64_t length64 = utext_nativeLength(text);
        if (length64 <= INT32_MAX) {
            length = (int32_t)length64;
        } else {
            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
        }
    }
}
void  WholeStringBreakIterator::adoptText(CharacterIterator*) {
    UPRV_UNREACHABLE;  // should not be called
}

int32_t WholeStringBreakIterator::first() { return 0; }
int32_t WholeStringBreakIterator::last() { return length; }
int32_t WholeStringBreakIterator::previous() { return 0; }
int32_t WholeStringBreakIterator::next() { return length; }
int32_t WholeStringBreakIterator::current() const { return 0; }
int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; }
int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; }
UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; }
int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; }

BreakIterator *WholeStringBreakIterator::createBufferClone(
        void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) {
    if (U_SUCCESS(errorCode)) {
        errorCode = U_UNSUPPORTED_ERROR;
    }
    return nullptr;
}
BreakIterator &WholeStringBreakIterator::refreshInputText(
        UText * /*input*/, UErrorCode &errorCode) {
    if (U_SUCCESS(errorCode)) {
        errorCode = U_UNSUPPORTED_ERROR;
    }
    return *this;
}

U_CFUNC
BreakIterator *ustrcase_getTitleBreakIterator(
        const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
        LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    options &= U_TITLECASE_ITERATOR_MASK;
    if (options != 0 && iter != nullptr) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    if (iter == nullptr) {
        switch (options) {
        case 0:
            iter = BreakIterator::createWordInstance(
                locale != nullptr ? *locale : Locale(locID), errorCode);
            break;
        case U_TITLECASE_WHOLE_STRING:
            iter = new WholeStringBreakIterator();
            if (iter == nullptr) {
                errorCode = U_MEMORY_ALLOCATION_ERROR;
            }
            break;
        case U_TITLECASE_SENTENCES:
            iter = BreakIterator::createSentenceInstance(
                locale != nullptr ? *locale : Locale(locID), errorCode);
            break;
        default:
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
            break;
        }
        ownedIter.adoptInstead(iter);
    }
    return iter;
}

int32_t CaseMap::toTitle(
        const char *locale, uint32_t options, BreakIterator *iter,
        const UChar *src, int32_t srcLength,
        UChar *dest, int32_t destCapacity, Edits *edits,
        UErrorCode &errorCode) {
    LocalPointer<BreakIterator> ownedIter;
    iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
    if(iter==NULL) {
        return 0;
    }
    UnicodeString s(srcLength<0, src, srcLength);
    iter->setText(s);
    return ustrcase_map(
        ustrcase_getCaseLocale(locale), options, iter,
        dest, destCapacity,
        src, srcLength,
        ustrcase_internalToTitle, edits, errorCode);
}

U_NAMESPACE_END

U_NAMESPACE_USE

U_CAPI int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
             const UChar *src, int32_t srcLength,
             UBreakIterator *titleIter,
             const char *locale,
             UErrorCode *pErrorCode) {
    LocalPointer<BreakIterator> ownedIter;
    BreakIterator *iter = ustrcase_getTitleBreakIterator(
        nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter),
        ownedIter, *pErrorCode);
    if (iter == nullptr) {
        return 0;
    }
    UnicodeString s(srcLength<0, src, srcLength);
    iter->setText(s);
    return ustrcase_mapWithOverlap(
        ustrcase_getCaseLocale(locale), 0, iter,
        dest, destCapacity,
        src, srcLength,
        ustrcase_internalToTitle, *pErrorCode);
}

U_CAPI int32_t U_EXPORT2
ucasemap_toTitle(UCaseMap *csm,
                 UChar *dest, int32_t destCapacity,
                 const UChar *src, int32_t srcLength,
                 UErrorCode *pErrorCode) {
    if (U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if (csm->iter == NULL) {
        LocalPointer<BreakIterator> ownedIter;
        BreakIterator *iter = ustrcase_getTitleBreakIterator(
            nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
        if (iter == nullptr) {
            return 0;
        }
        csm->iter = ownedIter.orphan();
    }
    UnicodeString s(srcLength<0, src, srcLength);
    csm->iter->setText(s);
    return ustrcase_map(
        csm->caseLocale, csm->options, csm->iter,
        dest, destCapacity,
        src, srcLength,
        ustrcase_internalToTitle, NULL, *pErrorCode);
}

#endif  // !UCONFIG_NO_BREAK_ITERATION