DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (b6d82b1a6b02)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
//  rbbitblb.h
//

/*
**********************************************************************
*   Copyright (c) 2002-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

#ifndef RBBITBLB_H
#define RBBITBLB_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_BREAK_ITERATION

#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "rbbirb.h"
#include "rbbinode.h"


U_NAMESPACE_BEGIN

class RBBIRuleScanner;
class RBBIRuleBuilder;
class UVector32;

//
//  class RBBITableBuilder is part of the RBBI rule compiler.
//                         It builds the state transition table used by the RBBI runtime
//                         from the expression syntax tree generated by the rule scanner.
//
//                         This class is part of the RBBI implementation only.
//                         There is no user-visible public API here.
//

class RBBITableBuilder : public UMemory {
public:
    RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
    ~RBBITableBuilder();

    void     buildForwardTable();

    /** Return the runtime size in bytes of the built state table.  */
    int32_t  getTableSize() const;

    /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
     */
    void     exportTable(void *where);

    /**
     *  Find duplicate (redundant) character classes. Begin looking with categories.first.
     *  Duplicate, if found are returned in the categories parameter.
     *  This is an iterator-like function, used to identify character classes
     *  (state table columns) that can be eliminated.
     *  @param categories in/out parameter, specifies where to start looking for duplicates,
     *                and returns the first pair of duplicates found, if any.
     *  @return true if duplicate char classes were found, false otherwise.
     */
    bool     findDuplCharClassFrom(IntPair *categories);

    /** Remove a column from the state table. Used when two character categories
     *  have been found equivalent, and merged together, to eliminate the uneeded table column.
     */
    void     removeColumn(int32_t column);

    /**
     * Check for, and remove dupicate states (table rows).
     * @return the number of states removed.
     */
    int32_t  removeDuplicateStates();

    /** Build the safe reverse table from the already-constructed forward table. */
    void     buildSafeReverseTable(UErrorCode &status);

    /** Return the runtime size in bytes of the built safe reverse state table. */
    int32_t  getSafeTableSize() const;

    /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
     */
    void     exportSafeTable(void *where);


private:
    void     calcNullable(RBBINode *n);
    void     calcFirstPos(RBBINode *n);
    void     calcLastPos(RBBINode  *n);
    void     calcFollowPos(RBBINode *n);
    void     calcChainedFollowPos(RBBINode *n);
    void     bofFixup();
    void     buildStateTable();
    void     flagAcceptingStates();
    void     flagLookAheadStates();
    void     flagTaggedStates();
    void     mergeRuleStatusVals();

    /**
     * Merge redundant state table columns, eliminating character classes with identical behavior.
     * Done after the state tables are generated, just before converting to their run-time format.
     */
    int32_t  mergeColumns();

    void     addRuleRootNodes(UVector *dest, RBBINode *node);

    /**
     *  Find duplicate (redundant) states, beginning at the specified pair,
     *  within this state table. This is an iterator-like function, used to
     *  identify states (state table rows) that can be eliminated.
     *  @param states in/out parameter, specifies where to start looking for duplicates,
     *                and returns the first pair of duplicates found, if any.
     *  @return true if duplicate states were found, false otherwise.
     */
    bool findDuplicateState(IntPair *states);

    /** Remove a duplicate state.
     * @param duplStates The duplicate states. The first is kept, the second is removed.
     *                   All references to the second in the state table are retargeted
     *                   to the first.
     */
    void removeState(IntPair duplStates);

    /** Find the next duplicate state in the safe reverse table. An iterator function.
     *  @param states in/out parameter, specifies where to start looking for duplicates,
     *                and returns the first pair of duplicates found, if any.
     *  @return true if a duplicate pair of states was found.
     */
    bool findDuplicateSafeState(IntPair *states);

    /** Remove a duplicate state from the safe table.
     * @param duplStates The duplicate states. The first is kept, the second is removed.
     *                   All references to the second in the state table are retargeted
     *                   to the first.
     */
    void removeSafeState(IntPair duplStates);

    // Set functions for UVector.
    //   TODO:  make a USet subclass of UVector

    void     setAdd(UVector *dest, UVector *source);
    UBool    setEquals(UVector *a, UVector *b);

    void     sortedAdd(UVector **dest, int32_t val);

public:
#ifdef RBBI_DEBUG
    void     printSet(UVector *s);
    void     printPosSets(RBBINode *n /* = NULL*/);
    void     printStates();
    void     printRuleStatusTable();
    void     printReverseTable();
#else
    #define  printSet(s)
    #define  printPosSets(n)
    #define  printStates()
    #define  printRuleStatusTable()
    #define  printReverseTable()
#endif

private:
    RBBIRuleBuilder  *fRB;
    RBBINode         *&fTree;              // The root node of the parse tree to build a
                                           //   table for.
    UErrorCode       *fStatus;

    /** State Descriptors, UVector<RBBIStateDescriptor> */
    UVector          *fDStates;            //  D states (Aho's terminology)
                                           //  Index is state number
                                           //  Contents are RBBIStateDescriptor pointers.

    /** Synthesized safe table, UVector of UnicodeString, one string per table row.   */
    UVector          *fSafeTable;


    RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
    RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
};

//
//  RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
//                        one for each state.
class RBBIStateDescriptor : public UMemory {
public:
    UBool            fMarked;
    int32_t          fAccepting;
    int32_t          fLookAhead;
    UVector          *fTagVals;
    int32_t          fTagsIdx;
    UVector          *fPositions;          // Set of parse tree positions associated
                                           //   with this state.  Unordered (it's a set).
                                           //   UVector contents are RBBINode *

    UVector32        *fDtran;              // Transitions out of this state.
                                           //   indexed by input character
                                           //   contents is int index of dest state
                                           //   in RBBITableBuilder.fDStates

    RBBIStateDescriptor(int maxInputSymbol,  UErrorCode *fStatus);
    ~RBBIStateDescriptor();

private:
    RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
    RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
};



U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

#endif