DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
/*
**********************************************************************
* Copyright (C) 1999-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef RBT_DATA_H
#define RBT_DATA_H

#include "unicode/utypes.h"
#include "unicode/uclean.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "rbt_set.h"
#include "hash.h"

U_NAMESPACE_BEGIN

class UnicodeFunctor;
class UnicodeMatcher;
class UnicodeReplacer;

/**
 * The rule data for a RuleBasedTransliterators.  RBT objects hold
 * a const pointer to a TRD object that they do not own.  TRD objects
 * are essentially the parsed rules in compact, usable form.  The
 * TRD objects themselves are held for the life of the process in
 * a static cache owned by Transliterator.
 *
 * This class' API is a little asymmetric.  There is a method to
 * define a variable, but no way to define a set.  This is because the
 * sets are defined by the parser in a UVector, and the vector is
 * copied into a fixed-size array here.  Once this is done, no new
 * sets may be defined.  In practice, there is no need to do so, since
 * generating the data and using it are discrete phases.  When there
 * is a need to access the set data during the parse phase, another
 * data structure handles this.  See the parsing code for more
 * details.
 */
class TransliterationRuleData : public UMemory {

public:

    // PUBLIC DATA MEMBERS

    /**
     * Rule table.  May be empty.
     */
    TransliterationRuleSet ruleSet;

    /**
     * Map variable name (String) to variable (UnicodeString).  A variable name
     * corresponds to zero or more characters, stored in a UnicodeString in
     * this hash.  One or more of these chars may also correspond to a
     * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
     * a stand-in: it is an index for a secondary lookup in
     * data.variables.  The stand-in also represents the UnicodeMatcher in
     * the stored rules.
     */
    Hashtable variableNames;

    /**
     * Map category variable (UChar) to set (UnicodeFunctor).
     * Variables that correspond to a set of characters are mapped
     * from variable name to a stand-in character in data.variableNames.
     * The stand-in then serves as a key in this hash to lookup the
     * actual UnicodeFunctor object.  In addition, the stand-in is
     * stored in the rule text to represent the set of characters.
     * variables[i] represents character (variablesBase + i).
     */
    UnicodeFunctor** variables;

    /**
     * Flag that indicates whether the variables are owned (if a single
     * call to Transliterator::createFromRules() produces a CompoundTransliterator
     * with more than one RuleBasedTransliterator as children, they all share
     * the same variables list, so only the first one is considered to own
     * the variables)
     */
    UBool variablesAreOwned;

    /**
     * The character that represents variables[0].  Characters
     * variablesBase through variablesBase +
     * variablesLength - 1 represent UnicodeFunctor objects.
     */
    UChar variablesBase;

    /**
     * The length of variables.
     */
    int32_t variablesLength;

public:

    /**
     * Constructor
     * @param status Output param set to success/failure code on exit.
     */
    TransliterationRuleData(UErrorCode& status);

    /**
     * Copy Constructor
     */
    TransliterationRuleData(const TransliterationRuleData&);

    /**
     * destructor
     */
    ~TransliterationRuleData();

    /**
     * Given a stand-in character, return the UnicodeFunctor that it
     * represents, or NULL if it doesn't represent anything.
     * @param standIn    the given stand-in character.
     * @return           the UnicodeFunctor that 'standIn' represents
     */
    UnicodeFunctor* lookup(UChar32 standIn) const;

    /**
     * Given a stand-in character, return the UnicodeMatcher that it
     * represents, or NULL if it doesn't represent anything or if it
     * represents something that is not a matcher.
     * @param standIn    the given stand-in character.
     * @return           return the UnicodeMatcher that 'standIn' represents
     */
    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;

    /**
     * Given a stand-in character, return the UnicodeReplacer that it
     * represents, or NULL if it doesn't represent anything or if it
     * represents something that is not a replacer.
     * @param standIn    the given stand-in character.
     * @return           return the UnicodeReplacer that 'standIn' represents
     */
    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;


private:
    TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif