loclikely.cpp - mozsearch

mozilla-central/intl/icu/source/common/loclikely.cpp (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: JavaScript: Internationalization API

Revision control

Copy as Markdown

Other Tools

// © 2016 and later: Unicode, Inc. and others.

// License & terms of use: http://www.unicode.org/copyright.html

/*

*******************************************************************************

*   Copyright (C) 1997-2016, International Business Machines

*   Corporation and others.  All Rights Reserved.

*******************************************************************************

*   file name:  loclikely.cpp

*   encoding:   UTF-8

*   tab size:   8 (not used)

*   indentation:4

*   created on: 2010feb25

*   created by: Markus W. Scherer

*   Code for likely and minimized locale subtags, separated out from other .cpp files

*   that then do not depend on resource bundle code and likely-subtags data.

*/

#include "unicode/bytestream.h"

#include "unicode/utypes.h"

#include "unicode/locid.h"

#include "unicode/putil.h"

#include "unicode/uchar.h"

#include "unicode/uloc.h"

#include "unicode/ures.h"

#include "unicode/uscript.h"

#include "bytesinkutil.h"

#include "charstr.h"

#include "cmemory.h"

#include "cstring.h"

#include "ulocimp.h"

#include "ustr_imp.h"

/**

 * These are the canonical strings for unknown languages, scripts and regions.

**/

static const char* const unknownLanguage = "und";

static const char* const unknownScript = "Zzzz";

static const char* const unknownRegion = "ZZ";

/**

 * This function looks for the localeID in the likelySubtags resource.

 * @param localeID The tag to find.

 * @param buffer A buffer to hold the matching entry

 * @param bufferLength The length of the output buffer

 * @return A pointer to "buffer" if found, or a null pointer if not.

*/

static const char*  U_CALLCONV

findLikelySubtags(const char* localeID,

                  char* buffer,

                  int32_t bufferLength,

                  UErrorCode* err) {

    const char* result = nullptr;

    if (!U_FAILURE(*err)) {

        int32_t resLen = 0;

        const char16_t* s = nullptr;

        UErrorCode tmpErr = U_ZERO_ERROR;

        icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));

        if (U_SUCCESS(tmpErr)) {

            icu::CharString und;

            if (localeID != nullptr) {

                if (*localeID == '\0') {

                    localeID = unknownLanguage;

                } else if (*localeID == '_') {

                    und.append(unknownLanguage, *err);

                    und.append(localeID, *err);

                    if (U_FAILURE(*err)) {

                        return nullptr;

                    localeID = und.data();

            s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);

            if (U_FAILURE(tmpErr)) {

/*

                 * If a resource is missing, it's not really an error, it's

                 * just that we don't have any data for that particular locale ID.

*/

                if (tmpErr != U_MISSING_RESOURCE_ERROR) {

                    *err = tmpErr;

            else if (resLen >= bufferLength) {

                /* The buffer should never overflow. */

                *err = U_INTERNAL_PROGRAM_ERROR;

            else {

                u_UCharsToChars(s, buffer, resLen + 1);

                if (resLen >= 3 &&

                    uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&

                    (resLen == 3 || buffer[3] == '_')) {

                    uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);

                result = buffer;

        } else {

            *err = tmpErr;

    return result;

/**

 * Append a tag to a buffer, adding the separator if necessary.  The buffer

 * must be large enough to contain the resulting tag plus any separator

 * necessary. The tag must not be a zero-length string.

 * @param tag The tag to add.

 * @param tagLength The length of the tag.

 * @param buffer The output buffer.

 * @param bufferLength The length of the output buffer.  This is an input/output parameter.

**/

static void U_CALLCONV

appendTag(

    const char* tag,

    int32_t tagLength,

    char* buffer,

    int32_t* bufferLength,

    UBool withSeparator) {

    if (withSeparator) {

        buffer[*bufferLength] = '_';

        ++(*bufferLength);

    uprv_memmove(

        &buffer[*bufferLength],

        tag,

        tagLength);

    *bufferLength += tagLength;

/**

 * Create a tag string from the supplied parameters.  The lang, script and region

 * parameters may be nullptr pointers. If they are, their corresponding length parameters

 * must be less than or equal to 0.

 * If any of the language, script or region parameters are empty, and the alternateTags

 * parameter is not nullptr, it will be parsed for potential language, script and region tags

 * to be used when constructing the new tag.  If the alternateTags parameter is nullptr, or

 * it contains no language tag, the default tag for the unknown language is used.

 * If the length of the new string exceeds the capacity of the output buffer,

 * the function copies as many bytes to the output buffer as it can, and returns

 * the error U_BUFFER_OVERFLOW_ERROR.

 * If an illegal argument is provided, the function returns the error

 * U_ILLEGAL_ARGUMENT_ERROR.

 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if

 * the tag string fits in the output buffer, but the null terminator doesn't.

 * @param lang The language tag to use.

 * @param langLength The length of the language tag.

 * @param script The script tag to use.

 * @param scriptLength The length of the script tag.

 * @param region The region tag to use.

 * @param regionLength The length of the region tag.

 * @param trailing Any trailing data to append to the new tag.

 * @param trailingLength The length of the trailing data.

 * @param alternateTags A string containing any alternate tags.

 * @param sink The output sink receiving the tag string.

 * @param err A pointer to a UErrorCode for error reporting.

**/

static void U_CALLCONV

createTagStringWithAlternates(

    const char* lang,

    int32_t langLength,

    const char* script,

    int32_t scriptLength,

    const char* region,

    int32_t regionLength,

    const char* trailing,

    int32_t trailingLength,

    const char* alternateTags,

    icu::ByteSink& sink,

    UErrorCode* err) {

    if (U_FAILURE(*err)) {

        goto error;

    else if (langLength >= ULOC_LANG_CAPACITY ||

             scriptLength >= ULOC_SCRIPT_CAPACITY ||

             regionLength >= ULOC_COUNTRY_CAPACITY) {

        goto error;

    else {

/**

         * ULOC_FULLNAME_CAPACITY will provide enough capacity

         * that we can build a string that contains the language,

         * script and region code without worrying about overrunning

         * the user-supplied buffer.

**/

        char tagBuffer[ULOC_FULLNAME_CAPACITY];

        int32_t tagLength = 0;

        UBool regionAppended = false;

        if (langLength > 0) {

            appendTag(

                lang,

                langLength,

                tagBuffer,

                &tagLength,

                /*withSeparator=*/false);

        else if (alternateTags == nullptr) {

/*

             * Use the empty string for an unknown language, if

             * we found no language.

*/

        else {

/*

             * Parse the alternateTags string for the language.

*/

            char alternateLang[ULOC_LANG_CAPACITY];

            int32_t alternateLangLength = sizeof(alternateLang);

            alternateLangLength =

                uloc_getLanguage(

                    alternateTags,

                    alternateLang,

                    alternateLangLength,

                    err);

            if(U_FAILURE(*err) ||

                alternateLangLength >= ULOC_LANG_CAPACITY) {

                goto error;

            else if (alternateLangLength == 0) {

/*

                 * Use the empty string for an unknown language, if

                 * we found no language.

*/

            else {

                appendTag(

                    alternateLang,

                    alternateLangLength,

                    tagBuffer,

                    &tagLength,

                    /*withSeparator=*/false);

        if (scriptLength > 0) {

            appendTag(

                script,

                scriptLength,

                tagBuffer,

                &tagLength,

                /*withSeparator=*/true);

        else if (alternateTags != nullptr) {

/*

             * Parse the alternateTags string for the script.

*/

            char alternateScript[ULOC_SCRIPT_CAPACITY];

            const int32_t alternateScriptLength =

                uloc_getScript(

                    alternateTags,

                    alternateScript,

                    sizeof(alternateScript),

                    err);

            if (U_FAILURE(*err) ||

                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {

                goto error;

            else if (alternateScriptLength > 0) {

                appendTag(

                    alternateScript,

                    alternateScriptLength,

                    tagBuffer,

                    &tagLength,

                    /*withSeparator=*/true);

        if (regionLength > 0) {

            appendTag(

                region,

                regionLength,

                tagBuffer,

                &tagLength,

                /*withSeparator=*/true);

            regionAppended = true;

        else if (alternateTags != nullptr) {

/*

             * Parse the alternateTags string for the region.

*/

            char alternateRegion[ULOC_COUNTRY_CAPACITY];

            const int32_t alternateRegionLength =

                uloc_getCountry(

                    alternateTags,

                    alternateRegion,

                    sizeof(alternateRegion),

                    err);

            if (U_FAILURE(*err) ||

                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {

                goto error;

            else if (alternateRegionLength > 0) {

                appendTag(

                    alternateRegion,

                    alternateRegionLength,

                    tagBuffer,

                    &tagLength,

                    /*withSeparator=*/true);

                regionAppended = true;

/**

         * Copy the partial tag from our internal buffer to the supplied

         * target.

**/

        sink.Append(tagBuffer, tagLength);

        if (trailingLength > 0) {

            if (*trailing != '@') {

                sink.Append("_", 1);

                if (!regionAppended) {

                    /* extra separator is required */

                    sink.Append("_", 1);

/*

             * Copy the trailing data into the supplied buffer.

*/

            sink.Append(trailing, trailingLength);

        return;

error:

/**

     * An overflow indicates the locale ID passed in

     * is ill-formed.  If we got here, and there was

     * no previous error, it's an implicit overflow.

**/

    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||

        U_SUCCESS(*err)) {

        *err = U_ILLEGAL_ARGUMENT_ERROR;

/**

 * Create a tag string from the supplied parameters.  The lang, script and region

 * parameters may be nullptr pointers. If they are, their corresponding length parameters

 * must be less than or equal to 0.  If the lang parameter is an empty string, the

 * default value for an unknown language is written to the output buffer.

 * If the length of the new string exceeds the capacity of the output buffer,

 * the function copies as many bytes to the output buffer as it can, and returns

 * the error U_BUFFER_OVERFLOW_ERROR.

 * If an illegal argument is provided, the function returns the error

 * U_ILLEGAL_ARGUMENT_ERROR.

 * @param lang The language tag to use.

 * @param langLength The length of the language tag.

 * @param script The script tag to use.

 * @param scriptLength The length of the script tag.

 * @param region The region tag to use.

 * @param regionLength The length of the region tag.

 * @param trailing Any trailing data to append to the new tag.

 * @param trailingLength The length of the trailing data.

 * @param sink The output sink receiving the tag string.

 * @param err A pointer to a UErrorCode for error reporting.

**/

static void U_CALLCONV

createTagString(

    const char* lang,

    int32_t langLength,

    const char* script,

    int32_t scriptLength,

    const char* region,

    int32_t regionLength,

    const char* trailing,

    int32_t trailingLength,

    icu::ByteSink& sink,

    UErrorCode* err)

    createTagStringWithAlternates(

                lang,

                langLength,

                script,

                scriptLength,

                region,

                regionLength,

                trailing,

                trailingLength,

                nullptr,

                sink,

                err);

/**

 * Parse the language, script, and region subtags from a tag string, and copy the

 * results into the corresponding output parameters. The buffers are null-terminated,

 * unless overflow occurs.

 * The langLength, scriptLength, and regionLength parameters are input/output

 * parameters, and must contain the capacity of their corresponding buffers on

 * input.  On output, they will contain the actual length of the buffers, not

 * including the null terminator.

 * If the length of any of the output subtags exceeds the capacity of the corresponding

 * buffer, the function copies as many bytes to the output buffer as it can, and returns

 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow

 * occurs.

 * If an illegal argument is provided, the function returns the error

 * U_ILLEGAL_ARGUMENT_ERROR.

 * @param localeID The locale ID to parse.

 * @param lang The language tag buffer.

 * @param langLength The length of the language tag.

 * @param script The script tag buffer.

 * @param scriptLength The length of the script tag.

 * @param region The region tag buffer.

 * @param regionLength The length of the region tag.

 * @param err A pointer to a UErrorCode for error reporting.

 * @return The number of chars of the localeID parameter consumed.

**/

static int32_t U_CALLCONV

parseTagString(

    const char* localeID,

    char* lang,

    int32_t* langLength,

    char* script,

    int32_t* scriptLength,

    char* region,

    int32_t* regionLength,

    UErrorCode* err)

    const char* position = localeID;

    int32_t subtagLength = 0;

    if(U_FAILURE(*err) ||

       localeID == nullptr ||

       lang == nullptr ||

       langLength == nullptr ||

       script == nullptr ||

       scriptLength == nullptr ||

       region == nullptr ||

       regionLength == nullptr) {

        goto error;

    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);

/*

     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING

     * to be an error, because it indicates the user-supplied tag is

     * not well-formed.

*/

    if(U_FAILURE(*err)) {

        goto error;

    *langLength = subtagLength;

/*

     * If no language was present, use the empty string instead.

     * Otherwise, move past any separator.

*/

    if (_isIDSeparator(*position)) {

        ++position;

    subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);

    if(U_FAILURE(*err)) {

        goto error;

    *scriptLength = subtagLength;

    if (*scriptLength > 0) {

        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {

/**

             * If the script part is the "unknown" script, then don't return it.

**/

            *scriptLength = 0;

/*

         * Move past any separator.

*/

        if (_isIDSeparator(*position)) {

            ++position;

    subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);

    if(U_FAILURE(*err)) {

        goto error;

    *regionLength = subtagLength;

    if (*regionLength > 0) {

        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {

/**

             * If the region part is the "unknown" region, then don't return it.

**/

            *regionLength = 0;

    } else if (*position != 0 && *position != '@') {

        /* back up over consumed trailing separator */

        --position;

exit:

    return (int32_t)(position - localeID);

error:

/**

     * If we get here, we have no explicit error, it's the result of an

     * illegal argument.

**/

    if (!U_FAILURE(*err)) {

        *err = U_ILLEGAL_ARGUMENT_ERROR;

    goto exit;

static UBool U_CALLCONV

createLikelySubtagsString(

    const char* lang,

    int32_t langLength,

    const char* script,

    int32_t scriptLength,

    const char* region,

    int32_t regionLength,

    const char* variants,

    int32_t variantsLength,

    icu::ByteSink& sink,

    UErrorCode* err) {

/**

     * ULOC_FULLNAME_CAPACITY will provide enough capacity

     * that we can build a string that contains the language,

     * script and region code without worrying about overrunning

     * the user-supplied buffer.

**/

    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];

    if(U_FAILURE(*err)) {

        goto error;

/**

     * Try the language with the script and region first.

**/

    if (scriptLength > 0 && regionLength > 0) {

        const char* likelySubtags = nullptr;

        icu::CharString tagBuffer;

            icu::CharStringByteSink sink(&tagBuffer);

            createTagString(

                lang,

                langLength,

                script,

                scriptLength,

                region,

                regionLength,

                nullptr,

0,

                sink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        likelySubtags =

            findLikelySubtags(

                tagBuffer.data(),

                likelySubtagsBuffer,

                sizeof(likelySubtagsBuffer),

                err);

        if(U_FAILURE(*err)) {

            goto error;

        if (likelySubtags != nullptr) {

            /* Always use the language tag from the

               maximal string, since it may be more

               specific than the one provided. */

            createTagStringWithAlternates(

                        nullptr,

0,

                        nullptr,

0,

                        nullptr,

0,

                        variants,

                        variantsLength,

                        likelySubtags,

                        sink,

                        err);

            return true;

/**

     * Try the language with just the script.

**/

    if (scriptLength > 0) {

        const char* likelySubtags = nullptr;

        icu::CharString tagBuffer;

            icu::CharStringByteSink sink(&tagBuffer);

            createTagString(

                lang,

                langLength,

                script,

                scriptLength,

                nullptr,

0,

                nullptr,

0,

                sink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        likelySubtags =

            findLikelySubtags(

                tagBuffer.data(),

                likelySubtagsBuffer,

                sizeof(likelySubtagsBuffer),

                err);

        if(U_FAILURE(*err)) {

            goto error;

        if (likelySubtags != nullptr) {

            /* Always use the language tag from the

               maximal string, since it may be more

               specific than the one provided. */

            createTagStringWithAlternates(

                        nullptr,

0,

                        nullptr,

0,

                        region,

                        regionLength,

                        variants,

                        variantsLength,

                        likelySubtags,

                        sink,

                        err);

            return true;

/**

     * Try the language with just the region.

**/

    if (regionLength > 0) {

        const char* likelySubtags = nullptr;

        icu::CharString tagBuffer;

            icu::CharStringByteSink sink(&tagBuffer);

            createTagString(

                lang,

                langLength,

                nullptr,

0,

                region,

                regionLength,

                nullptr,

0,

                sink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        likelySubtags =

            findLikelySubtags(

                tagBuffer.data(),

                likelySubtagsBuffer,

                sizeof(likelySubtagsBuffer),

                err);

        if(U_FAILURE(*err)) {

            goto error;

        if (likelySubtags != nullptr) {

            /* Always use the language tag from the

               maximal string, since it may be more

               specific than the one provided. */

            createTagStringWithAlternates(

                        nullptr,

0,

                        script,

                        scriptLength,

                        nullptr,

0,

                        variants,

                        variantsLength,

                        likelySubtags,

                        sink,

                        err);

            return true;

/**

     * Finally, try just the language.

**/

        const char* likelySubtags = nullptr;

        icu::CharString tagBuffer;

            icu::CharStringByteSink sink(&tagBuffer);

            createTagString(

                lang,

                langLength,

                nullptr,

0,

                nullptr,

0,

                nullptr,

0,

                sink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        likelySubtags =

            findLikelySubtags(

                tagBuffer.data(),

                likelySubtagsBuffer,

                sizeof(likelySubtagsBuffer),

                err);

        if(U_FAILURE(*err)) {

            goto error;

        if (likelySubtags != nullptr) {

            /* Always use the language tag from the

               maximal string, since it may be more

               specific than the one provided. */

            createTagStringWithAlternates(

                        nullptr,

0,

                        script,

                        scriptLength,

                        region,

                        regionLength,

                        variants,

                        variantsLength,

                        likelySubtags,

                        sink,

                        err);

            return true;

    return false;

error:

    if (!U_FAILURE(*err)) {

        *err = U_ILLEGAL_ARGUMENT_ERROR;

    return false;

#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \

    int32_t count = 0; \

    int32_t i; \

    for (i = 0; i < trailingLength; i++) { \

        if (trailing[i] == '-' || trailing[i] == '_') { \

            count = 0; \

            if (count > 8) { \

                goto error; \

} \

        } else if (trailing[i] == '@') { \

            break; \

        } else if (count > 8) { \

            goto error; \

        } else { \

            count++; \

} \

} \

} UPRV_BLOCK_MACRO_END

static UBool

_uloc_addLikelySubtags(const char* localeID,

                       icu::ByteSink& sink,

                       UErrorCode* err) {

    char lang[ULOC_LANG_CAPACITY];

    int32_t langLength = sizeof(lang);

    char script[ULOC_SCRIPT_CAPACITY];

    int32_t scriptLength = sizeof(script);

    char region[ULOC_COUNTRY_CAPACITY];

    int32_t regionLength = sizeof(region);

    const char* trailing = "";

    int32_t trailingLength = 0;

    int32_t trailingIndex = 0;

    UBool success = false;

    if(U_FAILURE(*err)) {

        goto error;

    if (localeID == nullptr) {

        goto error;

    trailingIndex = parseTagString(

        localeID,

        lang,

        &langLength,

        script,

        &scriptLength,

        region,

        &regionLength,

        err);

    if(U_FAILURE(*err)) {

        /* Overflow indicates an illegal argument error */

        if (*err == U_BUFFER_OVERFLOW_ERROR) {

            *err = U_ILLEGAL_ARGUMENT_ERROR;

        goto error;

    /* Find the length of the trailing portion. */

    while (_isIDSeparator(localeID[trailingIndex])) {

        trailingIndex++;

    trailing = &localeID[trailingIndex];

    trailingLength = (int32_t)uprv_strlen(trailing);

    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);

    success =

        createLikelySubtagsString(

            lang,

            langLength,

            script,

            scriptLength,

            region,

            regionLength,

            trailing,

            trailingLength,

            sink,

            err);

    if (!success) {

        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);

/*

         * If we get here, we need to return localeID.

*/

        sink.Append(localeID, localIDLength);

    return success;

error:

    if (!U_FAILURE(*err)) {

        *err = U_ILLEGAL_ARGUMENT_ERROR;

    return false;

// Add likely subtags to the sink

// return true if the value in the sink is produced by a match during the lookup

// return false if the value in the sink is the same as input because there are

// no match after the lookup.

static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);

static void

_uloc_minimizeSubtags(const char* localeID,

                      icu::ByteSink& sink,

                      UErrorCode* err) {

    icu::CharString maximizedTagBuffer;

    char lang[ULOC_LANG_CAPACITY];

    int32_t langLength = sizeof(lang);

    char script[ULOC_SCRIPT_CAPACITY];

    int32_t scriptLength = sizeof(script);

    char region[ULOC_COUNTRY_CAPACITY];

    int32_t regionLength = sizeof(region);

    const char* trailing = "";

    int32_t trailingLength = 0;

    int32_t trailingIndex = 0;

    UBool successGetMax = false;

    if(U_FAILURE(*err)) {

        goto error;

    else if (localeID == nullptr) {

        goto error;

    trailingIndex =

        parseTagString(

            localeID,

            lang,

            &langLength,

            script,

            &scriptLength,

            region,

            &regionLength,

            err);

    if(U_FAILURE(*err)) {

        /* Overflow indicates an illegal argument error */

        if (*err == U_BUFFER_OVERFLOW_ERROR) {

            *err = U_ILLEGAL_ARGUMENT_ERROR;

        goto error;

    /* Find the spot where the variants or the keywords begin, if any. */

    while (_isIDSeparator(localeID[trailingIndex])) {

        trailingIndex++;

    trailing = &localeID[trailingIndex];

    trailingLength = (int32_t)uprv_strlen(trailing);

    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);

        icu::CharString base;

            icu::CharStringByteSink baseSink(&base);

            createTagString(

                lang,

                langLength,

                script,

                scriptLength,

                region,

                regionLength,

                nullptr,

0,

                baseSink,

                err);

/**

         * First, we need to first get the maximization

         * from AddLikelySubtags.

**/

            icu::CharStringByteSink maxSink(&maximizedTagBuffer);

            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);

    if(U_FAILURE(*err)) {

        goto error;

    if (!successGetMax) {

/**

         * If we got here, return the locale ID parameter unchanged.

**/

        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);

        sink.Append(localeID, localeIDLength);

        return;

    // In the following, the lang, script, region are referring to those in

    // the maximizedTagBuffer, not the one in the localeID.

    langLength = sizeof(lang);

    scriptLength = sizeof(script);

    regionLength = sizeof(region);

    parseTagString(

        maximizedTagBuffer.data(),

        lang,

        &langLength,

        script,

        &scriptLength,

        region,

        &regionLength,

        err);

    if(U_FAILURE(*err)) {

        goto error;

/**

     * Start first with just the language.

**/

        icu::CharString tagBuffer;

            icu::CharStringByteSink tagSink(&tagBuffer);

            createLikelySubtagsString(

                lang,

                langLength,

                nullptr,

0,

                nullptr,

0,

                nullptr,

0,

                tagSink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        else if (!tagBuffer.isEmpty() &&

                 uprv_strnicmp(

                    maximizedTagBuffer.data(),

                    tagBuffer.data(),

                    tagBuffer.length()) == 0) {

            createTagString(

                        lang,

                        langLength,

                        nullptr,

0,

                        nullptr,

0,

                        trailing,

                        trailingLength,

                        sink,

                        err);

            return;

/**

     * Next, try the language and region.

**/

    if (regionLength > 0) {

        icu::CharString tagBuffer;

            icu::CharStringByteSink tagSink(&tagBuffer);

            createLikelySubtagsString(

                lang,

                langLength,

                nullptr,

0,

                region,

                regionLength,

                nullptr,

0,

                tagSink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        else if (!tagBuffer.isEmpty() &&

                 uprv_strnicmp(

                    maximizedTagBuffer.data(),

                    tagBuffer.data(),

                    tagBuffer.length()) == 0) {

            createTagString(

                        lang,

                        langLength,

                        nullptr,

0,

                        region,

                        regionLength,

                        trailing,

                        trailingLength,

                        sink,

                        err);

            return;

/**

     * Finally, try the language and script.  This is our last chance,

     * since trying with all three subtags would only yield the

     * maximal version that we already have.

**/

    if (scriptLength > 0) {

        icu::CharString tagBuffer;

            icu::CharStringByteSink tagSink(&tagBuffer);

            createLikelySubtagsString(

                lang,

                langLength,

                script,

                scriptLength,

                nullptr,

0,

                nullptr,

0,

                tagSink,

                err);

        if(U_FAILURE(*err)) {

            goto error;

        else if (!tagBuffer.isEmpty() &&

                 uprv_strnicmp(

                    maximizedTagBuffer.data(),

                    tagBuffer.data(),

                    tagBuffer.length()) == 0) {

            createTagString(

                        lang,

                        langLength,

                        script,

                        scriptLength,

                        nullptr,

0,

                        trailing,

                        trailingLength,

                        sink,

                        err);

            return;

/**

         * If we got here, return the max + trail.

**/

        createTagString(

                    lang,

                    langLength,

                    script,

                    scriptLength,

                    region,

                    regionLength,

                    trailing,

                    trailingLength,

                    sink,

                    err);

        return;

error:

    if (!U_FAILURE(*err)) {

        *err = U_ILLEGAL_ARGUMENT_ERROR;

static int32_t

do_canonicalize(const char*    localeID,

         char* buffer,

         int32_t bufferCapacity,

         UErrorCode* err)

    int32_t canonicalizedSize = uloc_canonicalize(

        localeID,

        buffer,

        bufferCapacity,

        err);

    if (*err == U_STRING_NOT_TERMINATED_WARNING ||

        *err == U_BUFFER_OVERFLOW_ERROR) {

        return canonicalizedSize;

    else if (U_FAILURE(*err)) {

        return -1;

    else {

        return canonicalizedSize;

U_CAPI int32_t U_EXPORT2

uloc_addLikelySubtags(const char* localeID,

                      char* maximizedLocaleID,

                      int32_t maximizedLocaleIDCapacity,

                      UErrorCode* status) {

    if (U_FAILURE(*status)) {

        return 0;

    icu::CheckedArrayByteSink sink(

            maximizedLocaleID, maximizedLocaleIDCapacity);

    ulocimp_addLikelySubtags(localeID, sink, status);

    int32_t reslen = sink.NumberOfBytesAppended();

    if (U_FAILURE(*status)) {

        return sink.Overflowed() ? reslen : -1;

    if (sink.Overflowed()) {

        *status = U_BUFFER_OVERFLOW_ERROR;

    } else {

        u_terminateChars(

                maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);

    return reslen;

static UBool

_ulocimp_addLikelySubtags(const char* localeID,

                          icu::ByteSink& sink,

                          UErrorCode* status) {

    PreflightingLocaleIDBuffer localeBuffer;

    do {

        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),

            localeBuffer.getCapacity(), status);

    } while (localeBuffer.needToTryAgain(status));

    if (U_SUCCESS(*status)) {

        return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);

    } else {

        return false;

U_CAPI void U_EXPORT2

ulocimp_addLikelySubtags(const char* localeID,

                         icu::ByteSink& sink,

                         UErrorCode* status) {

    _ulocimp_addLikelySubtags(localeID, sink, status);

U_CAPI int32_t U_EXPORT2

uloc_minimizeSubtags(const char* localeID,

                     char* minimizedLocaleID,

                     int32_t minimizedLocaleIDCapacity,

                     UErrorCode* status) {

    if (U_FAILURE(*status)) {

        return 0;

    icu::CheckedArrayByteSink sink(

            minimizedLocaleID, minimizedLocaleIDCapacity);

    ulocimp_minimizeSubtags(localeID, sink, status);

    int32_t reslen = sink.NumberOfBytesAppended();

    if (U_FAILURE(*status)) {

        return sink.Overflowed() ? reslen : -1;

    if (sink.Overflowed()) {

        *status = U_BUFFER_OVERFLOW_ERROR;

    } else {

        u_terminateChars(

                minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);

    return reslen;

U_CAPI void U_EXPORT2

ulocimp_minimizeSubtags(const char* localeID,

                        icu::ByteSink& sink,

                        UErrorCode* status) {

    PreflightingLocaleIDBuffer localeBuffer;

    do {

        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),

            localeBuffer.getCapacity(), status);

    } while (localeBuffer.needToTryAgain(status));

    _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);

// Pairs of (language subtag, + or -) for finding out fast if common languages

// are LTR (minus) or RTL (plus).

static const char LANG_DIR_STRING[] =

        "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";

// Implemented here because this calls ulocimp_addLikelySubtags().

U_CAPI UBool U_EXPORT2

uloc_isRightToLeft(const char *locale) {

    UErrorCode errorCode = U_ZERO_ERROR;

    char script[8];

    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);

    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||

            scriptLength == 0) {

        // Fastpath: We know the likely scripts and their writing direction

        // for some common languages.

        errorCode = U_ZERO_ERROR;

        char lang[8];

        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);

        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {

            return false;

        if (langLength > 0) {

            const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);

            if (langPtr != nullptr) {

                switch (langPtr[langLength]) {

                case '-': return false;

                case '+': return true;

                default: break;  // partial match of a longer code

        // Otherwise, find the likely script.

        errorCode = U_ZERO_ERROR;

        icu::CharString likely;

            icu::CharStringByteSink sink(&likely);

            ulocimp_addLikelySubtags(locale, sink, &errorCode);

        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {

            return false;

        scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);

        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||

                scriptLength == 0) {

            return false;

    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);

    return uscript_isRightToLeft(scriptCode);

U_NAMESPACE_BEGIN

UBool

Locale::isRightToLeft() const {

    return uloc_isRightToLeft(getBaseName());

U_NAMESPACE_END

// The following must at least allow for rg key value (6) plus terminator (1).

#define ULOC_RG_BUFLEN 8

U_CAPI int32_t U_EXPORT2

ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,

                                     char *region, int32_t regionCapacity, UErrorCode* status) {

    if (U_FAILURE(*status)) {

        return 0;

    char rgBuf[ULOC_RG_BUFLEN];

    UErrorCode rgStatus = U_ZERO_ERROR;

    // First check for rg keyword value

    int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);

    if (U_FAILURE(rgStatus) || rgLen != 6) {

        rgLen = 0;

    } else {

        // rgBuf guaranteed to be zero terminated here, with text len 6

        char *rgPtr = rgBuf;

        for (; *rgPtr!= 0; rgPtr++) {

            *rgPtr = uprv_toupper(*rgPtr);

        rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;

    if (rgLen == 0) {

        // No valid rg keyword value, try for unicode_region_subtag

        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);

        if (U_FAILURE(*status)) {

            rgLen = 0;

        } else if (rgLen == 0 && inferRegion) {

            // no unicode_region_subtag but inferRegion true, try likely subtags

            rgStatus = U_ZERO_ERROR;

            icu::CharString locBuf;

                icu::CharStringByteSink sink(&locBuf);

                ulocimp_addLikelySubtags(localeID, sink, &rgStatus);

            if (U_SUCCESS(rgStatus)) {

                rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);

                if (U_FAILURE(*status)) {

                    rgLen = 0;

    rgBuf[rgLen] = 0;

    uprv_strncpy(region, rgBuf, regionCapacity);

    return u_terminateChars(region, regionCapacity, rgLen, status);