DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (31ec81b5d7bb)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/*
*******************************************************************************
*
*   Copyright (C) 2000-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  ucol_elm.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created 02/22/2001
*   created by: Vladimir Weinstein
*
*   This program reads the Franctional UCA table and generates
*   internal format for UCA table as well as inverse UCA table.
*   It then writes binary files containing the data: ucadata.dat 
*   & invuca.dat
*/
#ifndef UCOL_UCAELEMS_H
#define UCOL_UCAELEMS_H

#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "ucol_tok.h"

#if !UCONFIG_NO_COLLATION

#include "ucol_imp.h"

#ifdef UCOL_DEBUG
#include "cmemory.h"
#include <stdio.h>
#endif

U_CDECL_BEGIN

/* This is the maximum trie capacity for the mapping trie.
Due to current limitations in genuca and the design of UTrie,
this number can't be more than 256K.
As of Unicode 5, it currently could safely go to 128K without
a problem. Normally, less than 32K are tailored.
*/
#define UCOL_ELM_TRIE_CAPACITY 0x40000

/* This is the maxmun capacity for temparay combining class 
 * table.  The table will be compacted after scanning all the
 * Unicode codepoints.
*/
#define UCOL_MAX_CM_TAB  0x10000


typedef struct {
    uint32_t *CEs;
    int32_t position;
    int32_t size;
} ExpansionTable;

typedef struct {
    UChar prefixChars[128];
    UChar *prefix;
    uint32_t prefixSize;
    UChar uchars[128];
    UChar *cPoints;
    uint32_t cSize;          /* Number of characters in sequence - for contraction */
    uint32_t noOfCEs;        /* Number of collation elements                       */
    uint32_t CEs[128];      /* These are collation elements - there could be more than one - in case of expansion */
    uint32_t mapCE;         /* This is the value element maps in original table   */
    uint32_t sizePrim[128];
    uint32_t sizeSec[128];
    uint32_t sizeTer[128];
    UBool caseBit;
    UBool isThai;
} UCAElements;

typedef struct {
  uint32_t *endExpansionCE;
  UBool    *isV;
  int32_t   position;
  int32_t   size;
  uint8_t   maxLSize;
  uint8_t   maxVSize;
  uint8_t   maxTSize;
} MaxJamoExpansionTable;

typedef struct {
  uint32_t *endExpansionCE;
  uint8_t  *expansionCESize;
  int32_t   position;
  int32_t   size;
} MaxExpansionTable;

typedef struct {
    uint16_t   index[256];  /* index of cPoints by combining class 0-255. */
    UChar      *cPoints;    /* code point array of all combining marks */
    uint32_t   size;        /* total number of combining marks */
} CombinClassTable;

typedef struct {
  /*CompactEIntArray      *mapping; */
  UNewTrie                 *mapping; 
  ExpansionTable        *expansions; 
  struct CntTable       *contractions;
  UCATableHeader        *image;
  UColOptionSet         *options;
  MaxExpansionTable     *maxExpansions;
  MaxJamoExpansionTable *maxJamoExpansions;
  uint8_t               *unsafeCP;
  uint8_t               *contrEndCP;
  const UCollator       *UCA;
  UHashtable      *prefixLookup;
  CombinClassTable      *cmLookup;  /* combining class lookup for tailoring. */
} tempUCATable; 

typedef struct {
    UChar cp;
    uint16_t cClass;   // combining class
}CompData;

typedef struct {
    CompData *precomp;
    int32_t precompLen;
    UChar *decomp;
    int32_t decompLen;
    UChar *comp;
    int32_t compLen;
    uint16_t curClass;
    uint16_t tailoringCM;
    int32_t  cmPos;
}tempTailorContext;

U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status);

U_CAPI int32_t U_EXPORT2
uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src,
                          icu::UnicodeSet *closed, UErrorCode *status);

U_CDECL_END

#endif /* #if !UCONFIG_NO_COLLATION */

#endif