DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Untracked file

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: NPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Communicator client code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 2002
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Simon Montagu <smontagu@netscape.com>
 *
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the NPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the NPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */


#include "nsUCConstructors.h"
#include "nsUnicodeToLangBoxArabic16.h"

#include "nsISupports.h"

// This table is based on http://www.langbox.com/arabic/FontSet_ISO8859-6-16.html
// Codepoints not in Unicode are mapped to 0x3F
static const unsigned char uni2lbox [] =
{
  0x6B, // U+FE70 ARABIC FATHATAN ISOLATED FORM
  0x90, // U+FE71 ARABIC FATHATAN ON TATWEEL
  0x6C, // U+FE72 ARABIC DAMMATAN ISOLATED FORM
  0x3F, // U+FE73
  0x6D, // U+FE74 ARABIC KASRATAN ISOLATED FORM
  0x3F, // U+FE75
  0x6E, // U+FE76 ARABIC FATHA ISOLATED FORM
  0x93, // U+FE77 ARABIC FATHA ON TATWEEL
  0x6F, // U+FE78 ARABIC DAMMA ISOLATED FORM
  0x94, // U+FE79 ARABIC DAMMA ON TATWEEL
  0x70, // U+FE7A ARABIC KASRA ISOLATED FORM
  0x95, // U+FE7B ARABIC KASRA ON TATWEEL
  0x71, // U+FE7C ARABIC SHADDA ISOLATED FORM
  0x97, // U+FE7D ARABIC SHADDA ON TATWEEL
  0x72, // U+FE7E ARABIC SUKUN ISOLATED FORM
  0x96, // U+FE7F ARABIC SUKUN ON TATWEEL
  0x41, // U+FE80 ARABIC HAMZA ISOLATED FORM
  0x42, // U+FE81 ARABIC LIGATURE MADDA ON ALEF ISOLATED FORM
  0xA1, // U+FE82 ARABIC LIGATURE MADDA ON ALEF FINAL FORM
  0x43, // U+FE83 ARABIC LIGATURE HAMZA ON ALEF ISOLATED FORM
  0xA2, // U+FE84 ARABIC LIGATURE HAMZA ON ALEF FINAL FORM
  0x44, // U+FE85 ARABIC LIGATURE HAMZA ON WAW ISOLATED FORM
  0xA3, // U+FE86 ARABIC LIGATURE HAMZA ON WAW FINAL FORM
  0x45, // U+FE87 ARABIC LIGATURE HAMZA UNDER ALEF ISOLATED FORM
  0xA4, // U+FE88 ARABIC LIGATURE HAMZA UNDER ALEF FINAL FORM
  0x46, // U+FE89 ARABIC LIGATURE HAMZA ON YEH ISOLATED FORM
  0xF9, // U+FE8A ARABIC LIGATURE HAMZA ON YA FINAL FORM
  0xF8, // U+FE8B ARABIC LIGATURE HAMZA ON YA INITIAL FORM
  0xA0, // U+FE8C ARABIC LIGATURE HAMZA ON YA MEDIAL FORM
  0x47, // U+FE8D ARABIC ALEF ISOLATED FORM
  0xA5, // U+FE8E ARABIC ALEF FINAL FORM
  0x48, // U+FE8F ARABIC BAA ISOLATED FORM
  0xAE, // U+FE90 ARABIC BAA FINAL FORM
  0xAC, // U+FE91 ARABIC BAA INITTIAL FORM
  0xAD, // U+FE92 ARABIC BAA MEDIAL FORM
  0x49, // U+FE93 ARABIC TAA MARBUTA ISOLATED FORM
  0xB1, // U+FE94 ARABIC TAA MARBUTA FINAL FORM
  0x4A, // U+FE95 ARABIC TAA ISOLATED FORM
  0xB4, // U+FE96 ARABIC TAA FINAL FORM
  0xB2, // U+FE97 ARABIC TAA INITIAL FORM
  0xB3, // U+FE98 ARABIC TAA MEDIAL FORM
  0x4B, // U+FE99 ARABIC THAA ISOLATED FORM
  0xB7, // U+FE9A ARABIC THAA FINAL FORM
  0xB5, // U+FE9B ARABIC THAA INITIAL FORM
  0xB6, // U+FE9C ARABIC THAA MEDIAL FORM
  0x4C, // U+FE9D ARABIC JEEM ISOLATED FORM
  0xBA, // U+FE9E ARABIC JEEM FINAL FORM
  0xB8, // U+FE9F ARABIC JEEM INITIAL FORM
  0xB9, // U+FEA0 ARABIC JEEM MEDIAL FORM
  0x4D, // U+FEA1 ARABIC HAA ISOLATED FORM
  0xBD, // U+FEA2 ARABIC HAA FINAL FORM
  0xBB, // U+FEA3 ARABIC HAA INITIAL FORM
  0xBC, // U+FEA4 ARABIC HAA MEDIAL FORM
  0x4E, // U+FEA5 ARABIC KHAA ISOLATED FORM
  0xC0, // U+FEA6 ARABIC KHAA FINAL FORM
  0xBE, // U+FEA7 ARABIC KHAA INITIAL FORM
  0xBF, // U+FEA8 ARABIC KHAA MEDIAL FORM
  0x4F, // U+FEA9 ARABIC DAL ISOLATED FORM
  0xA6, // U+FEAA ARABIC DAL FINAL FORM
  0x50, // U+FEAB ARABIC THAL ISOLATED FORM
  0xA7, // U+FEAC ARABIC THAL FINAL FORM
  0x51, // U+FEAD ARABIC RA ISOLATED FORM
  0xA8, // U+FEAE ARABIC RA FINAL FORM
  0x52, // U+FEAF ARABIC ZAIN ISOLATED FORM
  0xA9, // U+FEB0 ARABIC ZAIN FINAL FORM
  0x53, // U+FEB1 ARABIC SEEN ISOLATED FORM
  0xC3, // U+FEB2 ARABIC SEEN FINAL FORM
  0xC1, // U+FEB3 ARABIC SEEN INITIAL FORM
  0xC2, // U+FEB4 ARABIC SEEN IMEDIAL FORM
  0x54, // U+FEB5 ARABIC SHEEN ISOLATED FORM
  0xC6, // U+FEB6 ARABIC SHEEN FINAL FORM
  0xC4, // U+FEB7 ARABIC SHEEN INITIAL FORM
  0xC5, // U+FEB8 ARABIC SHEEN MEDIAL FORM
  0x55, // U+FEB9 ARABIC SAD ISOLATED FORM
  0xC9, // U+FEBA ARABIC SAD FINAL FORM
  0xC7, // U+FEBB ARABIC SAD INITIAL FORM
  0xC8, // U+FEBC ARABIC SAD MEDIAL FORM
  0x56, // U+FEBD ARABIC DAD ISOLATED FORM
  0xCC, // U+FEBE ARABIC DAD FINAL FORM
  0xCA, // U+FEBF ARABIC DAD INITIAL FORM
  0xCB, // U+FEC0 ARABIC DAD MEDIAL FORM
  0x57, // U+FEC1 ARABIC TAH ISOLATED FORM
  0xCF, // U+FEC2 ARABIC TAH FINAL FORM
  0xCD, // U+FEC3 ARABIC TAH INITIAL FORM
  0xCE, // U+FEC4 ARABIC TAH MEDIAL FORM
  0x58, // U+FEC5 ARABIC ZAH ISOLATED FORM
  0xD2, // U+FEC6 ARABIC ZAH FINAL FORM
  0xD0, // U+FEC7 ARABIC ZAH INITIAL FORM
  0xD1, // U+FEC8 ARABIC ZAH MEDIAL FORM
  0x59, // U+FEC9 ARABIC AIN ISOLATED FORM
  0xD5, // U+FECA ARABIC AIN FINAL FORM
  0xD3, // U+FECB ARABIC AIN INITIAL FORM
  0xD4, // U+FECC ARABIC AIN MEDIAL FORM
  0x5A, // U+FECD ARABIC GHAIN ISOLATED FORM
  0xD8, // U+FECE ARABIC GHAIN FINAL FORM
  0xD6, // U+FECF ARABIC GHAIN INITIAL FORM
  0xD7, // U+FED0 ARABIC GHAIN MEDIAL FORM
  0x61, // U+FED1 ARABIC FA ISOLATED FORM
  0xDB, // U+FED2 ARABIC FEH FINAL FORM
  0xD9, // U+FED3 ARABIC FEH INITIAL FORM
  0xDA, // U+FED4 ARABIC FEH MEDIAL FORM
  0x62, // U+FED5 ARABIC QAF ISOLATED FORM
  0xDE, // U+FED6 ARABIC QAF FINAL FORM
  0xDC, // U+FED7 ARABIC QAF INITIAL FORM
  0xDD, // U+FED8 ARABIC QAF MEDIAL FORM
  0x63, // U+FED9 ARABIC KAF ISOLATED FORM
  0xE1, // U+FEDA ARABIC KAF FINAL FORM
  0xDF, // U+FEDB ARABIC KAF INITIAL FORM
  0xE0, // U+FEDC ARABIC KAF MEDIAL FORM
  0x64, // U+FEDD ARABIC LAM ISOLATED FORM
  0xE4, // U+FEDE ARABIC LAM FINAL FORM
  0xE2, // U+FEDF ARABIC LAM INITIAL FORM
  0xE3, // U+FEE0 ARABIC LAM MEDIAL FORM
  0x65, // U+FEE1 ARABIC MEEM ISOLATED FORM
  0xE7, // U+FEE2 ARABIC MEEM FINAL FORM
  0xE5, // U+FEE3 ARABIC MEEM INITIAL FORM
  0xE6, // U+FEE4 ARABIC MEEM MEDIAL FORM
  0x66, // U+FEE5 ARABIC NOON ISOLATED FORM
  0xEA, // U+FEE6 ARABIC NOON FINAL FORM
  0xE8, // U+FEE7 ARABIC NOON INITIAL FORM
  0xE9, // U+FEE8 ARABIC NOON MEDIAL FORM
  0x67, // U+FEE9 ARABIC HA ISOLATED FORM
  0xED, // U+FEEA ARABIC HEH FINAL FORM
  0xEB, // U+FEEB ARABIC HEH INITIAL FORM
  0xEC, // U+FEEC ARABIC HEH MEDIAL FORM
  0x68, // U+FEED ARABIC WAW ISOLATED FORM
  0xAA, // U+FEEE ARABIC WAW FINAL FORM
  0x69, // U+FEEF ARABIC ALEF MAKSURA ISOLATED FORM
  0xAB, // U+FEF0 ARABIC ALEF MAKSURA FINAL FORM
  0x6A, // U+FEF1 ARABIC YEH ISOLATED FORM
  0xF0, // U+FEF2 ARABIC YEH FINAL FORM
  0xEE, // U+FEF3 ARABIC YEH INITIAL FORM
  0xEF, // U+FEF4 ARABIC YEH MEDIAL FORM
  0x76, // U+FEF5 ARABIC LIGATURE MADDA ON LAM ALEF ISOLATED FORM
  0xFA, // U+FEF6 ARABIC LIGATURE MADDA ON LAM ON ALEF FINAL FORM
  0x77, // U+FEF7 ARABIC LIGATURE HAMZA ON LAM ALEF ISOLATED FORM
  0xFC, // U+FEF8 ARABIC LIGATURE HAMZA ON LAM ALEF FINAL FORM
  0x78, // U+FEF9 ARABIC LIGATURE HAMZA UNDER LAM ALEF ISOLATED FORM
  0xFB, // U+FEFA ARABIC LIGATURE HAMZA UNDER LAM ALEF FINAL FORM
  0x79, // U+FEFB ARABIC LIGATURE LAM ALEF ISOLATED FORM
  0xFD  // U+FEFC ARABIC LIGATURE LAM ALEF FINAL FORM
 };

NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Convert(
      const PRUnichar * aSrc, PRInt32 * aSrcLength,
      char * aDest, PRInt32 * aDestLength)
{
   char* dest = aDest;
   PRInt32 inlen = 0;

   while (inlen < *aSrcLength) {
     PRUnichar aChar = aSrc[inlen];
     
     if (((aChar >= 0x0020) && (aChar <= 0x0027)) ||
          (aChar == 0x2A) ||
          (aChar == 0x2B) ||
         ((aChar >= 0x002D) && (aChar <= 0x002F)) ||
          (aChar == 0x003A) ||
         ((aChar >= 0x003C) && (aChar <= 0x003E)) ||
          (aChar == 0x40) ||
          (aChar == 0x5C) ||
          (aChar == 0x5E) ||
          (aChar == 0x5F) ||
          (aChar == 0x7C) ||
          (aChar == 0x7E)) {
       *dest++ = (char) aChar;
       // ISO-8859-6-16 swaps symmetric characters internally, but we have
       // already swapped them where necessary during Bidi reordering, so we
       // must swap them back here.
     } else if (0x0028 == aChar) {
       *dest++ = 0x29;
     } else if (0x0029 == aChar) {
       *dest++ = 0x28;
     } else if (0x005B == aChar) {
       *dest++ = 0x5D;
     } else if (0x005D == aChar) {
       *dest++ = 0x5B;
     } else if (0x007B == aChar) {
       *dest++ = 0x7D;
     } else if (0x007D == aChar) {
       *dest++ = 0x7B;
     } else if (0x060C == aChar) {
       // ARABIC COMMA
       *dest++ = 0x2C;
     } else if (0x061B == aChar) {
       // ARABIC SEMICOLON
       *dest++ = 0x3B;
     } else if (0x061F == aChar) {
       // ARABIC QUESTION MARK
       *dest++ = 0x3F;
     } else if (0x0640 == aChar) {
       // ARABIC TATWEEL
       *dest++ = 0x60;
     } else if ((aChar >= 0x0660) && (aChar <=0x0669)) {
       // ARABIC-INDIC DIGITS
       *dest++ = (char)(aChar - 0x0660 + 0x30);
     } else if ((aChar>=0xFE70) && (aChar <= 0xFEFC)) {
       // ARABIC PRESENTATION FORMS
       *dest++ = uni2lbox[aChar-0xFE70];
     } else {
       // do nothing
     }
     inlen++;
   }

    *aDestLength = dest - aDest;
    return NS_OK;
}

NS_IMETHODIMP nsUnicodeToLangBoxArabic16::GetMaxLength(
const PRUnichar * aSrc, PRInt32 aSrcLength,
                           PRInt32 * aDestLength)
{
  *aDestLength = aSrcLength;
  return NS_OK;
}

NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Finish(
      char * aDest, PRInt32 * aDestLength)
{
   *aDestLength=0;
   return NS_OK;
}

NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Reset()
{
   return NS_OK;
}

NS_IMETHODIMP nsUnicodeToLangBoxArabic16::SetOutputErrorBehavior(
      PRInt32 aBehavior,
      nsIUnicharEncoder * aEncoder, PRUnichar aChar)
{
   return NS_ERROR_NOT_IMPLEMENTED;
}


NS_IMETHODIMP nsUnicodeToLangBoxArabic16::FillInfo(PRUint32* aInfo)
{
   PRUnichar i;

   /* Start off by marking the whole ASCII range as unrepresentable. If we
    * don't do this we will try to use an ISO-8859-6.16 font for ASCII text
    * embedded in Arabic content, and it will not be rendered correctly.
    * See discussion in bug 172491
    */
   for(i=0x0000; i <= 0x007F; i++)
     CLEAR_REPRESENTABLE(aInfo, i);

   // Mark the few exceptions as representable.
   for(i=0x0020; i <= 0x002B; i++)
     SET_REPRESENTABLE(aInfo, i);
   for(i=0x002D; i <= 0x002F; i++)
     SET_REPRESENTABLE(aInfo, i);
   SET_REPRESENTABLE(aInfo, 0x003A);
   for(i=0x003C; i <= 0x003E; i++)
     SET_REPRESENTABLE(aInfo, i);
   SET_REPRESENTABLE(aInfo, 0x0040);
   for(i=0x005B; i <= 0x005F; i++)
     SET_REPRESENTABLE(aInfo, i);
   for(i=0x007B; i <= 0x007E;i++)
     SET_REPRESENTABLE(aInfo, i);

   // Arabic punctuation and numerals
   SET_REPRESENTABLE(aInfo, 0x060c);
   SET_REPRESENTABLE(aInfo, 0x061b);
   SET_REPRESENTABLE(aInfo, 0x061f);
   SET_REPRESENTABLE(aInfo, 0x0640);
   for(i=0x0660; i<=0x0669; i++)
      SET_REPRESENTABLE(aInfo, i);

   // Arabic Pres Form-B
   for(i=0xFE70; i <= 0xFE72; i++)
     SET_REPRESENTABLE(aInfo, i);
   SET_REPRESENTABLE(aInfo, 0xFE74);
   for(i=0xFE76; i <= 0xFEFC; i++)
     SET_REPRESENTABLE(aInfo, i);

   return NS_OK;
}