DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (409f3966645a)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "AccessibleWrap.h"
#include "nsString.h"
#include "nsMai.h"

/**
 * ATK offsets are counted in unicode codepoints, while DOM offsets are counted
 * in UTF-16 code units.  That makes a difference for non-BMP characters,
 * which need two UTF-16 code units to be represented (a pair of surrogates),
 * while they are just one unicode character.
 *
 * To keep synchronization between ATK offsets (unicode codepoints) and DOM
 * offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a
 * BOM after each non-BMP character (which would otherwise use 2 UTF-16
 * code units for only 1 unicode codepoint).
 *
 * BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but
 * that usage is deprecated) normally only appear at the beginning of unicode
 * files, but their occurrence within text (notably after cut&paste) is not
 * uncommon, and are thus considered as non-text.
 *
 * Since the selection requested through ATK may not contain both surrogates
 * at the ends of the selection, we need to fetch one UTF-16 code point more
 * on both side, and get rid of it before returning the string to ATK. The
 * ATKStringConverterHelper class maintains this, NewATKString should be used
 * to call it properly.
 *
 * In the end,
 * - if the start is between the high and low surrogates, the UTF-8 result
 * includes a BOM from it but not the character
 * - if the end is between the high and low surrogates, the UTF-8 result
 * includes the character but *not* the BOM
 * - all non-BMP characters that are fully in the string are in the UTF-8 result
 * as character followed by BOM
 */
namespace mozilla {
namespace a11y {

namespace DOMtoATK
{

  /**
   * Converts a string of accessible text into ATK gchar* string (by adding
   * BOMs). This can be used when offsets do not need to be adjusted because
   * ends of the string can not fall between surrogates.
   */
  gchar* Convert(const nsAString& aStr);

  /**
   * Add a BOM after each non-BMP character.
   */
  void AddBOMs(nsACString& aDest, const nsACString& aSource);

  /**
   * Replace all characters with asterisks (e.g. for password fields).
   */
  void ConvertTexttoAsterisks(nsAString& aString);

  /**
   * Parameterize conversion.
   */
  enum class AtkStringConvertFlags : uint32_t {
    None                   = 0,
    ConvertTextToAsterisks = 1 << 0,
  };

  MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(AtkStringConvertFlags)

  class ATKStringConverterHelper {
  public:
    ATKStringConverterHelper(void) :
#ifdef DEBUG
      mAdjusted (false),
#endif
      mStartShifted (false),
      mEndShifted (false) { }

    /**
     * In order to properly get non-BMP values, offsets need to be changed
     * to get one character more on each end, so that ConvertUTF16toUTF8 can
     * convert surrogates even if the originally requested offsets fall between
     * them.
     */
    void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count);

    /**
     * Converts a string of accessible text with adjusted offsets into ATK
     * gchar* string (by adding BOMs).  Note, AdjustOffsets has to be called
     * before getting the text passed to this.
     */
    gchar* ConvertAdjusted(const nsAString& aStr);

  private:
    /**
     * Remove the additional characters requested by PrepareUTF16toUTF8.
     */
    gchar* FinishUTF16toUTF8(nsCString& aStr);

#ifdef DEBUG
    bool mAdjusted;
#endif
    bool mStartShifted;
    bool mEndShifted;
  };

  /**
   * Get text from aAccessible, using ATKStringConverterHelper to properly
   * introduce appropriate BOMs.
   */
  template <class AccessibleOrProxy>
  gchar* NewATKString(AccessibleOrProxy* aAccessible,
                      gint aStartOffset, gint aEndOffset,
                      AtkStringConvertFlags aFlags)
  {
    gint startOffset = aStartOffset, endOffset = aEndOffset;
    ATKStringConverterHelper converter;
    converter.AdjustOffsets(&startOffset, &endOffset,
                            gint(aAccessible->CharacterCount()));
    nsAutoString str;
    aAccessible->TextSubstring(startOffset, endOffset, str);
    if (aFlags & AtkStringConvertFlags::ConvertTextToAsterisks)
      ConvertTexttoAsterisks(str);
    return converter.ConvertAdjusted(str);
  }

  /**
   * Get a character from aAccessible, fetching more data as appropriate to
   * properly get non-BMP characters or a BOM as appropriate.
   */
  template <class AccessibleCharAt>
  gunichar ATKCharacter(AccessibleCharAt* aAccessible, gint aOffset)
  {
    // char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib.
    gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset));

    if (NS_IS_LOW_SURROGATE(character)) {
      // Trailing surrogate, return BOM instead.
      return 0xFEFF;
    }

    if (NS_IS_HIGH_SURROGATE(character)) {
      // Heading surrogate, get the trailing surrogate and combine them.
      gunichar characterLow = static_cast<gunichar>(aAccessible->CharAt(aOffset + 1));

      if (!NS_IS_LOW_SURROGATE(characterLow)) {
        // It should have been a trailing surrogate... Flag the error.
        return 0xFFFD;
      }
      return SURROGATE_TO_UCS4(character, characterLow);
    }

    return character;
  }

}

} // namespace a11y
} // namespace mozilla