DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Mercurial (5350524bb654)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2002-2014, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
*/
#ifndef USETITER_H
#define USETITER_H

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/unistr.h"

/**
 * \file 
 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
 */

U_NAMESPACE_BEGIN

class UnicodeSet;
class UnicodeString;

/**
 *
 * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
 * iterates over either code points or code point ranges.  After all
 * code points or ranges have been returned, it returns the
 * multicharacter strings of the UnicodeSet, if any.
 *
 * This class is not intended to be subclassed.  Consider any fields
 *  or methods declared as "protected" to be private.  The use of
 *  protected in this class is an artifact of history.
 *
 * <p>To iterate over code points and strings, use a loop like this:
 * <pre>
 * UnicodeSetIterator it(set);
 * while (it.next()) {
 *     processItem(it.getString());
 * }
 * </pre>
 * <p>Each item in the set is accessed as a string.  Set elements
 *    consisting of single code points are returned as strings containing
 *    just the one code point.
 *
 * <p>To iterate over code point ranges, instead of individual code points,
 *    use a loop like this:
 * <pre>
 * UnicodeSetIterator it(set);
 * while (it.nextRange()) {
 *   if (it.isString()) {
 *     processString(it.getString());
 *   } else {
 *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
 *   }
 * }
 * </pre>
 * @author M. Davis
 * @stable ICU 2.4
 */
class U_COMMON_API UnicodeSetIterator : public UObject {

 protected:

    /**
     * Value of <tt>codepoint</tt> if the iterator points to a string.
     * If <tt>codepoint == IS_STRING</tt>, then examine
     * <tt>string</tt> for the current iteration result.
     * @stable ICU 2.4
     */
    enum { IS_STRING = -1 };

    /**
     * Current code point, or the special value <tt>IS_STRING</tt>, if
     * the iterator points to a string.
     * @stable ICU 2.4
     */
    UChar32 codepoint;

    /**
     * When iterating over ranges using <tt>nextRange()</tt>,
     * <tt>codepointEnd</tt> contains the inclusive end of the
     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
     * iterating over code points using <tt>next()</tt>, or if
     * <tt>codepoint == IS_STRING</tt>, then the value of
     * <tt>codepointEnd</tt> is undefined.
     * @stable ICU 2.4
     */
    UChar32 codepointEnd;

    /**
     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
     * value of <tt>string</tt> is undefined.
     * @stable ICU 2.4
     */
    const UnicodeString* string;

 public:

    /**
     * Create an iterator over the given set.  The iterator is valid
     * only so long as <tt>set</tt> is valid.
     * @param set set to iterate over
     * @stable ICU 2.4
     */
    UnicodeSetIterator(const UnicodeSet& set);

    /**
     * Create an iterator over nothing.  <tt>next()</tt> and
     * <tt>nextRange()</tt> return false. This is a convenience
     * constructor allowing the target to be set later.
     * @stable ICU 2.4
     */
    UnicodeSetIterator();

    /**
     * Destructor.
     * @stable ICU 2.4
     */
    virtual ~UnicodeSetIterator();

    /**
     * Returns true if the current element is a string.  If so, the
     * caller can retrieve it with <tt>getString()</tt>.  If this
     * method returns false, the current element is a code point or
     * code point range, depending on whether <tt>next()</tt> or
     * <tt>nextRange()</tt> was called.
     * Elements of types string and codepoint can both be retrieved
     * with the function <tt>getString()</tt>.
     * Elements of type codepoint can also be retrieved with
     * <tt>getCodepoint()</tt>.
     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
     * of the range, and <tt>getCodepointEnd()</tt> returns the end
     * of the range.
     * @stable ICU 2.4
     */
    inline UBool isString() const;

    /**
     * Returns the current code point, if <tt>isString()</tt> returned
     * false.  Otherwise returns an undefined result.
     * @stable ICU 2.4
     */
    inline UChar32 getCodepoint() const;

    /**
     * Returns the end of the current code point range, if
     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
     * called.  Otherwise returns an undefined result.
     * @stable ICU 2.4
     */
    inline UChar32 getCodepointEnd() const;

    /**
     * Returns the current string, if <tt>isString()</tt> returned
     * true.  If the current iteration item is a code point, a UnicodeString
     * containing that single code point is returned.
     *
     * Ownership of the returned string remains with the iterator.
     * The string is guaranteed to remain valid only until the iterator is
     *   advanced to the next item, or until the iterator is deleted.
     * 
     * @stable ICU 2.4
     */
    const UnicodeString& getString();

    /**
     * Advances the iteration position to the next element in the set, 
     * which can be either a single code point or a string.  
     * If there are no more elements in the set, return false.
     *
     * <p>
     * If <tt>isString() == TRUE</tt>, the value is a
     * string, otherwise the value is a
     * single code point.  Elements of either type can be retrieved
     * with the function <tt>getString()</tt>, while elements of
     * consisting of a single code point can be retrieved with
     * <tt>getCodepoint()</tt>
     *
     * <p>The order of iteration is all code points in sorted order,
     * followed by all strings sorted order.    Do not mix
     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
     * calling <tt>reset()</tt> between them.  The results of doing so
     * are undefined.
     *
     * @return true if there was another element in the set.
     * @stable ICU 2.4
     */
    UBool next();

    /**
     * Returns the next element in the set, either a code point range
     * or a string.  If there are no more elements in the set, return
     * false.  If <tt>isString() == TRUE</tt>, the value is a
     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
     * range of one or more code points from <tt>getCodepoint()</tt> to
     * <tt>getCodepointeEnd()</tt> inclusive.
     *
     * <p>The order of iteration is all code points ranges in sorted
     * order, followed by all strings sorted order.  Ranges are
     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
     * <tt>reset()</tt> between them.  The results of doing so are
     * undefined.
     *
     * @return true if there was another element in the set.
     * @stable ICU 2.4
     */
    UBool nextRange();

    /**
     * Sets this iterator to visit the elements of the given set and
     * resets it to the start of that set.  The iterator is valid only
     * so long as <tt>set</tt> is valid.
     * @param set the set to iterate over.
     * @stable ICU 2.4
     */
    void reset(const UnicodeSet& set);

    /**
     * Resets this iterator to the start of the set.
     * @stable ICU 2.4
     */
    void reset();

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     *
     * @stable ICU 2.4
     */
    static UClassID U_EXPORT2 getStaticClassID();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @stable ICU 2.4
     */
    virtual UClassID getDynamicClassID() const;

    // ======================= PRIVATES ===========================

 protected:

    // endElement and nextElements are really UChar32's, but we keep
    // them as signed int32_t's so we can do comparisons with
    // endElement set to -1.  Leave them as int32_t's.
    /** The set
     * @stable ICU 2.4
     */
    const UnicodeSet* set;
    /** End range
     * @stable ICU 2.4
     */
    int32_t endRange;
    /** Range
     * @stable ICU 2.4
     */
    int32_t range;
    /** End element
     * @stable ICU 2.4
     */
    int32_t endElement;
    /** Next element
     * @stable ICU 2.4
     */
    int32_t nextElement;
    //UBool abbreviated;
    /** Next string
     * @stable ICU 2.4
     */
    int32_t nextString;
    /** String count
     * @stable ICU 2.4
     */
    int32_t stringCount;

    /**
     *  Points to the string to use when the caller asks for a
     *  string and the current iteration item is a code point, not a string.
     *  @internal
     */
    UnicodeString *cpString;

    /** Copy constructor. Disallowed.
     * @stable ICU 2.4
     */
    UnicodeSetIterator(const UnicodeSetIterator&); // disallow

    /** Assignment operator. Disallowed.
     * @stable ICU 2.4
     */
    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow

    /** Load range
     * @stable ICU 2.4
     */
    virtual void loadRange(int32_t range);

};

inline UBool UnicodeSetIterator::isString() const {
    return codepoint == (UChar32)IS_STRING;
}

inline UChar32 UnicodeSetIterator::getCodepoint() const {
    return codepoint;
}

inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
    return codepointEnd;
}


U_NAMESPACE_END

#endif