DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
/*
**********************************************************************
* Copyright (C) 1999-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef RBT_SET_H
#define RBT_SET_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "unicode/utrans.h"
#include "uvector.h"

U_NAMESPACE_BEGIN

class Replaceable;
class TransliterationRule;
class TransliterationRuleData;
class UnicodeFilter;
class UnicodeString;
class UnicodeSet;

/**
 * A set of rules for a <code>RuleBasedTransliterator</code>.
 * @author Alan Liu
 */
class TransliterationRuleSet : public UMemory {
    /**
     * Vector of rules, in the order added.  This is used while the
     * rule set is getting built.  After that, freeze() reorders and
     * indexes the rules into rules[].  Any given rule is stored once
     * in ruleVector, and one or more times in rules[].  ruleVector
     * owns and deletes the rules.
     */
    UVector* ruleVector;

    /**
     * Sorted and indexed table of rules.  This is created by freeze()
     * from the rules in ruleVector.  It contains alias pointers to
     * the rules in ruleVector.  It is zero before freeze() is called
     * and non-zero thereafter.
     */
    TransliterationRule** rules;

    /**
     * Index table.  For text having a first character c, compute x = c&0xFF.
     * Now use rules[index[x]..index[x+1]-1].  This index table is created by
     * freeze().  Before freeze() is called it contains garbage.
     */
    int32_t index[257];

    /**
     * Length of the longest preceding context
     */
    int32_t maxContextLength;

public:

    /**
     * Construct a new empty rule set.
     * @param status    Output parameter filled in with success or failure status.
     */
    TransliterationRuleSet(UErrorCode& status);

    /**
     * Copy constructor.
     */
    TransliterationRuleSet(const TransliterationRuleSet&);

    /**
     * Destructor.
     */
    virtual ~TransliterationRuleSet();

    /**
     * Change the data object that this rule belongs to.  Used
     * internally by the TransliterationRuleData copy constructor.
     * @param data    the new data value to be set.
     */
    void setData(const TransliterationRuleData* data);

    /**
     * Return the maximum context length.
     * @return the length of the longest preceding context.
     */
    virtual int32_t getMaximumContextLength(void) const;

    /**
     * Add a rule to this set.  Rules are added in order, and order is
     * significant.  The last call to this method must be followed by
     * a call to <code>freeze()</code> before the rule set is used.
     * This method must <em>not</em> be called after freeze() has been
     * called.
     *
     * @param adoptedRule the rule to add
     */
    virtual void addRule(TransliterationRule* adoptedRule,
                         UErrorCode& status);

    /**
     * Check this for masked rules and index it to optimize performance.
     * The sequence of operations is: (1) add rules to a set using
     * <code>addRule()</code>; (2) freeze the set using
     * <code>freeze()</code>; (3) use the rule set.  If
     * <code>addRule()</code> is called after calling this method, it
     * invalidates this object, and this method must be called again.
     * That is, <code>freeze()</code> may be called multiple times,
     * although for optimal performance it shouldn't be.
     * @param parseError A pointer to UParseError to receive information about errors
     *                   occurred.
     * @param status     Output parameter filled in with success or failure status.
     */
    virtual void freeze(UParseError& parseError, UErrorCode& status);
    
    /**
     * Transliterate the given text with the given UTransPosition
     * indices.  Return TRUE if the transliteration should continue
     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
     * Note that FALSE is only ever returned if isIncremental is TRUE.
     * @param text the text to be transliterated
     * @param index the position indices, which will be updated
     * @param isIncremental if TRUE, assume new text may be inserted
     * at index.limit, and return FALSE if thre is a partial match.
     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
     * indicating that transliteration should stop until more text
     * arrives.
     */
    UBool transliterate(Replaceable& text,
                        UTransPosition& index,
                        UBool isIncremental);

    /**
     * Create rule strings that represents this rule set.
     * @param result string to receive the rule strings.  Current
     * contents will be deleted.
     * @param escapeUnprintable  True, will escape the unprintable characters
     * @return    A reference to 'result'.
     */
    virtual UnicodeString& toRules(UnicodeString& result,
                                   UBool escapeUnprintable) const;

    /**
     * Return the set of all characters that may be modified
     * (getTarget=false) or emitted (getTarget=true) by this set.
     */
    UnicodeSet& getSourceTargetSet(UnicodeSet& result,
                   UBool getTarget) const;

private:

    TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif