DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (b6d82b1a6b02)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
* Copyright (C) 2014-2016, International Business Machines
* Corporation and others.  All Rights Reserved.
******************************************************************************
* simpleformatter.cpp
*/

#include "unicode/utypes.h"
#include "unicode/simpleformatter.h"
#include "unicode/unistr.h"
#include "uassert.h"

U_NAMESPACE_BEGIN

namespace {

/**
 * Argument numbers must be smaller than this limit.
 * Text segment lengths are offset by this much.
 * This is currently the only unused char value in compiled patterns,
 * except it is the maximum value of the first unit (max arg +1).
 */
const int32_t ARG_NUM_LIMIT = 0x100;
/**
 * Initial and maximum char/UChar value set for a text segment.
 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
 */
const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
/**
 * Maximum length of a text segment. Longer segments are split into shorter ones.
 */
const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;

enum {
    APOS = 0x27,
    DIGIT_ZERO = 0x30,
    DIGIT_ONE = 0x31,
    DIGIT_NINE = 0x39,
    OPEN_BRACE = 0x7b,
    CLOSE_BRACE = 0x7d
};

inline UBool isInvalidArray(const void *array, int32_t length) {
   return (length < 0 || (array == NULL && length != 0));
}

}  // namespace

SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
    if (this == &other) {
        return *this;
    }
    compiledPattern = other.compiledPattern;
    return *this;
}

SimpleFormatter::~SimpleFormatter() {}

UBool SimpleFormatter::applyPatternMinMaxArguments(
        const UnicodeString &pattern,
        int32_t min, int32_t max,
        UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) {
        return FALSE;
    }
    // Parse consistent with MessagePattern, but
    // - support only simple numbered arguments
    // - build a simple binary structure into the result string
    const UChar *patternBuffer = pattern.getBuffer();
    int32_t patternLength = pattern.length();
    // Reserve the first char for the number of arguments.
    compiledPattern.setTo((UChar)0);
    int32_t textLength = 0;
    int32_t maxArg = -1;
    UBool inQuote = FALSE;
    for (int32_t i = 0; i < patternLength;) {
        UChar c = patternBuffer[i++];
        if (c == APOS) {
            if (i < patternLength && (c = patternBuffer[i]) == APOS) {
                // double apostrophe, skip the second one
                ++i;
            } else if (inQuote) {
                // skip the quote-ending apostrophe
                inQuote = FALSE;
                continue;
            } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
                // Skip the quote-starting apostrophe, find the end of the quoted literal text.
                ++i;
                inQuote = TRUE;
            } else {
                // The apostrophe is part of literal text.
                c = APOS;
            }
        } else if (!inQuote && c == OPEN_BRACE) {
            if (textLength > 0) {
                compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
                                          (UChar)(ARG_NUM_LIMIT + textLength));
                textLength = 0;
            }
            int32_t argNumber;
            if ((i + 1) < patternLength &&
                    0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
                    patternBuffer[i + 1] == CLOSE_BRACE) {
                i += 2;
            } else {
                // Multi-digit argument number (no leading zero) or syntax error.
                // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
                // around the number, but this class does not.
                argNumber = -1;
                if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
                    argNumber = c - DIGIT_ZERO;
                    while (i < patternLength &&
                            DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
                        argNumber = argNumber * 10 + (c - DIGIT_ZERO);
                        if (argNumber >= ARG_NUM_LIMIT) {
                            break;
                        }
                    }
                }
                if (argNumber < 0 || c != CLOSE_BRACE) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return FALSE;
                }
            }
            if (argNumber > maxArg) {
                maxArg = argNumber;
            }
            compiledPattern.append((UChar)argNumber);
            continue;
        }  // else: c is part of literal text
        // Append c and track the literal-text segment length.
        if (textLength == 0) {
            // Reserve a char for the length of a new text segment, preset the maximum length.
            compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
        }
        compiledPattern.append(c);
        if (++textLength == MAX_SEGMENT_LENGTH) {
            textLength = 0;
        }
    }
    if (textLength > 0) {
        compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
                                  (UChar)(ARG_NUM_LIMIT + textLength));
    }
    int32_t argCount = maxArg + 1;
    if (argCount < min || max < argCount) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return FALSE;
    }
    compiledPattern.setCharAt(0, (UChar)argCount);
    return TRUE;
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0 };
    return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        const UnicodeString &value1,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0, &value1 };
    return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
}

UnicodeString& SimpleFormatter::format(
        const UnicodeString &value0,
        const UnicodeString &value1,
        const UnicodeString &value2,
        UnicodeString &appendTo, UErrorCode &errorCode) const {
    const UnicodeString *values[] = { &value0, &value1, &value2 };
    return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
}

UnicodeString& SimpleFormatter::formatAndAppend(
        const UnicodeString *const *values, int32_t valuesLength,
        UnicodeString &appendTo,
        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return appendTo;
    }
    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
            valuesLength < getArgumentLimit()) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return appendTo;
    }
    return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
                  appendTo, NULL, TRUE,
                  offsets, offsetsLength, errorCode);
}

UnicodeString &SimpleFormatter::formatAndReplace(
        const UnicodeString *const *values, int32_t valuesLength,
        UnicodeString &result,
        int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
    if (U_FAILURE(errorCode)) {
        return result;
    }
    if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return result;
    }
    const UChar *cp = compiledPattern.getBuffer();
    int32_t cpLength = compiledPattern.length();
    if (valuesLength < getArgumentLimit(cp, cpLength)) {
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return result;
    }

    // If the pattern starts with an argument whose value is the same object
    // as the result, then we keep the result contents and append to it.
    // Otherwise we replace its contents.
    int32_t firstArg = -1;
    // If any non-initial argument value is the same object as the result,
    // then we first copy its contents and use that instead while formatting.
    UnicodeString resultCopy;
    if (getArgumentLimit(cp, cpLength) > 0) {
        for (int32_t i = 1; i < cpLength;) {
            int32_t n = cp[i++];
            if (n < ARG_NUM_LIMIT) {
                if (values[n] == &result) {
                    if (i == 2) {
                        firstArg = n;
                    } else if (resultCopy.isEmpty() && !result.isEmpty()) {
                        resultCopy = result;
                    }
                }
            } else {
                i += n - ARG_NUM_LIMIT;
            }
        }
    }
    if (firstArg < 0) {
        result.remove();
    }
    return format(cp, cpLength, values,
                  result, &resultCopy, FALSE,
                  offsets, offsetsLength, errorCode);
}

UnicodeString SimpleFormatter::getTextWithNoArguments(
        const UChar *compiledPattern,
        int32_t compiledPatternLength,
        int32_t* offsets,
        int32_t offsetsLength) {
    for (int32_t i = 0; i < offsetsLength; i++) {
        offsets[i] = -1;
    }
    int32_t capacity = compiledPatternLength - 1 -
            getArgumentLimit(compiledPattern, compiledPatternLength);
    UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
    for (int32_t i = 1; i < compiledPatternLength;) {
        int32_t n = compiledPattern[i++];
        if (n > ARG_NUM_LIMIT) {
            n -= ARG_NUM_LIMIT;
            sb.append(compiledPattern + i, n);
            i += n;
        } else if (n < offsetsLength) {
            offsets[n] = sb.length();
        }
    }
    return sb;
}

UnicodeString &SimpleFormatter::format(
        const UChar *compiledPattern, int32_t compiledPatternLength,
        const UnicodeString *const *values,
        UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
        int32_t *offsets, int32_t offsetsLength,
        UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) {
        return result;
    }
    for (int32_t i = 0; i < offsetsLength; i++) {
        offsets[i] = -1;
    }
    for (int32_t i = 1; i < compiledPatternLength;) {
        int32_t n = compiledPattern[i++];
        if (n < ARG_NUM_LIMIT) {
            const UnicodeString *value = values[n];
            if (value == NULL) {
                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                return result;
            }
            if (value == &result) {
                if (forbidResultAsValue) {
                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
                    return result;
                }
                if (i == 2) {
                    // We are appending to result which is also the first value object.
                    if (n < offsetsLength) {
                        offsets[n] = 0;
                    }
                } else {
                    if (n < offsetsLength) {
                        offsets[n] = result.length();
                    }
                    result.append(*resultCopy);
                }
            } else {
                if (n < offsetsLength) {
                    offsets[n] = result.length();
                }
                result.append(*value);
            }
        } else {
            int32_t length = n - ARG_NUM_LIMIT;
            result.append(compiledPattern + i, length);
            i += length;
        }
    }
    return result;
}

U_NAMESPACE_END