DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (2b66e14805d0)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef NS_UNICODERANGE_H
#define NS_UNICODERANGE_H

#include <stdint.h>

// The following constants define unicode subranges
// values below kRangeNum must be continuous so that we can map to
// lang group directly.
// all ranges we care about should be defined under 32, that allows
// us to store range using bits of a uint32_t

// frequently used range definitions
const uint8_t kRangeCyrillic = 0;
const uint8_t kRangeGreek = 1;
const uint8_t kRangeHebrew = 2;
const uint8_t kRangeArabic = 3;
const uint8_t kRangeThai = 4;
const uint8_t kRangeKorean = 5;
const uint8_t kRangeJapanese = 6;
const uint8_t kRangeSChinese = 7;
const uint8_t kRangeTChinese = 8;
const uint8_t kRangeDevanagari = 9;
const uint8_t kRangeTamil = 10;
const uint8_t kRangeArmenian = 11;
const uint8_t kRangeBengali = 12;
const uint8_t kRangeCanadian = 13;
const uint8_t kRangeEthiopic = 14;
const uint8_t kRangeGeorgian = 15;
const uint8_t kRangeGujarati = 16;
const uint8_t kRangeGurmukhi = 17;
const uint8_t kRangeKhmer = 18;
const uint8_t kRangeMalayalam = 19;
const uint8_t kRangeOriya = 20;
const uint8_t kRangeTelugu = 21;
const uint8_t kRangeKannada = 22;
const uint8_t kRangeSinhala = 23;
const uint8_t kRangeTibetan = 24;

const uint8_t kRangeSpecificItemNum = 25;

// range/rangeSet grow to this place 25-29

const uint8_t kRangeSetStart = 30;  // range set definition starts from here
const uint8_t kRangeSetLatin = 30;
const uint8_t kRangeSetCJK = 31;
const uint8_t kRangeSetEnd = 31;  // range set definition ends here, this
                                  // and smaller ranges are used as bit
                                  // mask, don't increase this value.

// less frequently used range definition
const uint8_t kRangeSurrogate = 32;
const uint8_t kRangePrivate = 33;
const uint8_t kRangeMisc = 34;
const uint8_t kRangeUnassigned = 35;
const uint8_t kRangeSyriac = 36;
const uint8_t kRangeThaana = 37;
const uint8_t kRangeLao = 38;
const uint8_t kRangeMyanmar = 39;
const uint8_t kRangeCherokee = 40;
const uint8_t kRangeOghamRunic = 41;
const uint8_t kRangeMongolian = 42;
const uint8_t kRangeMathOperators = 43;
const uint8_t kRangeMiscTechnical = 44;
const uint8_t kRangeControlOpticalEnclose = 45;
const uint8_t kRangeBoxBlockGeometrics = 46;
const uint8_t kRangeMiscSymbols = 47;
const uint8_t kRangeDingbats = 48;
const uint8_t kRangeBraillePattern = 49;
const uint8_t kRangeYi = 50;
const uint8_t kRangeCombiningDiacriticalMarks = 51;
const uint8_t kRangeSpecials = 52;

// aggregate ranges for non-BMP codepoints (u+2xxxx are all CJK)
const uint8_t kRangeSMP = 53;           // u+1xxxx
const uint8_t kRangeHigherPlanes = 54;  // u+3xxxx and above

const uint8_t kRangeTableBase =
    128;  // values over 127 are reserved for internal use only
const uint8_t kRangeTertiaryTable = 145;  // leave room for 16 subtable
                                          // indices (kRangeTableBase + 1 ..
                                          // kRangeTableBase + 16)

uint32_t FindCharUnicodeRange(uint32_t ch);

#endif