DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
/*
******************************************************************************
*
*   Copyright (C) 2008-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*   file name:  uspoof_conf.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2009Jan05
*   created by: Andy Heninger
*
*   Internal classes for compiling confusable data into its binary (runtime) form.
*/

#ifndef __USPOOF_BUILDCONF_H__
#define __USPOOF_BUILDCONF_H__

#if !UCONFIG_NO_NORMALIZATION

#if !UCONFIG_NO_REGULAR_EXPRESSIONS 

#include "uspoof_impl.h"

U_NAMESPACE_BEGIN

// SPUString
//              Holds a string that is the result of one of the mappings defined
//              by the confusable mapping data (confusables.txt from Unicode.org)
//              Instances of SPUString exist during the compilation process only.

struct SPUString : public UMemory {
    UnicodeString  *fStr;             // The actual string.
    int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
                                      //  (or, for length 1, the single string char itself,
                                      //   there being no string table entry for it.)
    SPUString(UnicodeString *s);
    ~SPUString();
};


//  String Pool   A utility class for holding the strings that are the result of
//                the spoof mappings.  These strings will utimately end up in the
//                run-time String Table.
//                This is sort of like a sorted set of strings, except that ICU's anemic
//                built-in collections don't support those, so it is implemented with a
//                combination of a uhash and a UVector.


class SPUStringPool : public UMemory {
  public:
    SPUStringPool(UErrorCode &status);
    ~SPUStringPool();
    
    // Add a string. Return the string from the table.
    // If the input parameter string is already in the table, delete the
    //  input parameter and return the existing string.
    SPUString *addString(UnicodeString *src, UErrorCode &status);


    // Get the n-th string in the collection.
    SPUString *getByIndex(int32_t i);

    // Sort the contents; affects the ordering of getByIndex().
    void sort(UErrorCode &status);

    int32_t size();

  private:
    UVector     *fVec;    // Elements are SPUString *
    UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
};


// class ConfusabledataBuilder
//     An instance of this class exists while the confusable data is being built from source.
//     It encapsulates the intermediate data structures that are used for building.
//     It exports one static function, to do a confusable data build.

class ConfusabledataBuilder : public UMemory {
  private:
    SpoofImpl  *fSpoofImpl;
    UChar      *fInput;
    UHashtable *fSLTable;
    UHashtable *fSATable; 
    UHashtable *fMLTable; 
    UHashtable *fMATable;
    UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.

    // The binary data is first assembled into the following four collections, then
    //   copied to its final raw-memory destination.
    UVector            *fKeyVec;
    UVector            *fValueVec;
    UnicodeString      *fStringTable;
    UVector            *fStringLengthsTable;
    
    SPUStringPool      *stringPool;
    URegularExpression *fParseLine;
    URegularExpression *fParseHexNum;
    int32_t             fLineNum;

    ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
    ~ConfusabledataBuilder();
    void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);

    // Add an entry to the key and value tables being built
    //   input:  data from SLTable, MATable, etc.
    //   outut:  entry added to fKeyVec and fValueVec
    void addKeyEntry(UChar32     keyChar,     // The key character
                     UHashtable *table,       // The table, one of SATable, MATable, etc.
                     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
                     UErrorCode &status);

    // From an index into fKeyVec & fValueVec
    //   get a UnicodeString with the corresponding mapping.
    UnicodeString getMapping(int32_t index);

    // Populate the final binary output data array with the compiled data.
    void outputData(UErrorCode &status);

  public:
    static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
        int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
};
U_NAMESPACE_END

#endif
#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS 
#endif  // __USPOOF_BUILDCONF_H__