DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
/*
*******************************************************************************
*
*   Copyright (C) 2001-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  ucol_tok.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created 02/22/2001
*   created by: Vladimir Weinstein
*
* This module reads a tailoring rule string and produces a list of 
* tokens that will be turned into collation elements
* 
*/

#ifndef UCOL_TOKENS_H
#define UCOL_TOKENS_H

#include "unicode/utypes.h"
#include "unicode/uset.h"

#if !UCONFIG_NO_COLLATION

#include "ucol_imp.h"
#include "uhash.h"
#include "unicode/parseerr.h"

#define UCOL_TOK_UNSET 0xFFFFFFFF
#define UCOL_TOK_RESET 0xDEADBEEF

#define UCOL_TOK_POLARITY_NEGATIVE 0
#define UCOL_TOK_POLARITY_POSITIVE 1

#define UCOL_TOK_TOP 0x04
#define UCOL_TOK_VARIABLE_TOP 0x08
#define UCOL_TOK_BEFORE 0x03
#define UCOL_TOK_SUCCESS 0x10

/* this is space for the extra strings that need to be unquoted */
/* during the parsing of the rules */
#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
typedef struct UColToken UColToken;

typedef struct  {
  UColToken* first;
  UColToken* last;
  UColToken* reset;
  UBool indirect;
  uint32_t baseCE;
  uint32_t baseContCE;
  uint32_t nextCE;
  uint32_t nextContCE;
  uint32_t previousCE;
  uint32_t previousContCE;
  int32_t pos[UCOL_STRENGTH_LIMIT];
  uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
  uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
  uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
  UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
  UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
} UColTokListHeader;

struct UColToken {
  UChar debugSource;
  UChar debugExpansion;
  UChar debugPrefix;
  uint32_t CEs[128];
  uint32_t noOfCEs;
  uint32_t expCEs[128];
  uint32_t noOfExpCEs;
  uint32_t source;
  uint32_t expansion;
  uint32_t prefix;
  uint32_t strength;
  uint32_t toInsert;
  uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
  UColTokListHeader *listHeader;
  UColToken* previous;
  UColToken* next;
  UChar **rulesToParseHdl;
  uint16_t flags;
};

/* 
 * This is a token that has been parsed
 * but not yet processed. Used to reduce
 * the number of arguments in the parser
 */
typedef struct {
  uint32_t strength;
  uint32_t charsOffset;
  uint32_t charsLen;
  uint32_t extensionOffset;
  uint32_t extensionLen;
  uint32_t prefixOffset;
  uint32_t prefixLen;
  uint16_t flags;
  uint16_t indirectIndex;
} UColParsedToken;


typedef struct {
  UColParsedToken parsedToken;
  UChar *source;
  UChar *end;
  const UChar *current;
  UChar *sourceCurrent;
  UChar *extraCurrent;
  UChar *extraEnd;
  const InverseUCATableHeader *invUCA;
  const UCollator *UCA;
  UHashtable *tailored;
  UColOptionSet *opts;
  uint32_t resultLen;
  uint32_t listCapacity;
  UColTokListHeader *lh;
  UColToken *varTop;
  USet *copySet;
  USet *removeSet;
  UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */

  UChar32 previousCp;               /* Previous code point. */
  /* For processing starred lists. */
  UBool isStarred;                   /* Are we processing a starred token? */
  UBool savedIsStarred;
  uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
  uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */

  /* For processing ranges. */
  UBool inRange;                     /* Are we in a range? */
  UChar32 currentRangeCp;           /* Current code point in the range. */
  UChar32 lastRangeCp;              /* The last code point in the range. */
  
  /* reorder codes for collation reordering */
  int32_t* reorderCodes;
  int32_t reorderCodesLength;

} UColTokenParser;

typedef struct {
  const UChar *subName;
  int32_t subLen;
  UColAttributeValue attrVal;
} ucolTokSuboption;

typedef struct {
   const UChar *optionName;
   int32_t optionLen;
   const ucolTokSuboption *subopts;
   int32_t subSize;
   UColAttribute attr;
} ucolTokOption;

#define ucol_tok_isSpecialChar(ch)              \
    (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
      (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
      (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
      (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
      (ch) == 0x007B))


U_CFUNC 
uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
                                    UParseError *parseError, 
                                    UErrorCode *status);

U_CFUNC
void ucol_tok_initTokenList(UColTokenParser *src,
                            const UChar *rules,
                            const uint32_t rulesLength,
                            const UCollator *UCA,
                            GetCollationRulesFunction importFunc,
                            void* context,
                            UErrorCode *status);

U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);

U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, 
                        UBool startOfRules,
                        UParseError *parseError,
                        UErrorCode *status);


U_CAPI const UChar * U_EXPORT2
ucol_tok_getNextArgument(const UChar *start, const UChar *end, 
                               UColAttribute *attrib, UColAttributeValue *value, 
                               UErrorCode *status);
U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
                                            uint32_t CE, uint32_t contCE,
                                            uint32_t *nextCE, uint32_t *nextContCE,
                                            uint32_t strength);
U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
                                            uint32_t CE, uint32_t contCE,
                                            uint32_t *prevCE, uint32_t *prevContCE,
                                            uint32_t strength);

const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
    void* context,
    const char* locale,
    const char* type,
    int32_t* pLength,
    UErrorCode* status);

#endif /* #if !UCONFIG_NO_COLLATION */

#endif