DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Mercurial (dcc6d7a0dc00)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsNCRFallbackEncoderWrapper.h"

#include "mozilla/dom/EncodingUtils.h"

nsNCRFallbackEncoderWrapper::nsNCRFallbackEncoderWrapper(const nsACString& aEncoding)
 : mEncoder(mozilla::dom::EncodingUtils::EncoderForEncoding(aEncoding))
{
}

nsNCRFallbackEncoderWrapper::~nsNCRFallbackEncoderWrapper()
{
}

bool
nsNCRFallbackEncoderWrapper::WriteNCR(nsACString& aBytes,
                                      uint32_t& aDstWritten,
                                      int32_t aUnmappable)
{
  // To avoid potentially shrinking aBytes and then growing it back, use
  // another string for number formatting.
  nsAutoCString ncr("&#");
  ncr.AppendInt(aUnmappable);
  ncr.Append(';');
  uint32_t ncrLen = ncr.Length();
  uint32_t needed = aDstWritten + ncrLen;
  if (needed > INT32_MAX) {
    return false;
  }
  if (needed > aBytes.Length() && !aBytes.SetLength(needed,
                                                    mozilla::fallible_t())) {
    return false;
  }
  memcpy(aBytes.BeginWriting() + aDstWritten,
         ncr.BeginReading(),
         ncrLen);
  aDstWritten += ncrLen;
  return true;
}

bool
nsNCRFallbackEncoderWrapper::Encode(const nsAString& aUtf16,
                                    nsACString& aBytes)
{
  // nsIUnicodeEncoder uses int32_t for sizes :-(
  if (aUtf16.Length() > INT32_MAX) {
    return false;
  }
  const char16_t* src = aUtf16.BeginReading();
  const char16_t* srcEnd = aUtf16.EndReading();
  uint32_t dstWritten = 0;
  for (;;) {
    int32_t srcLen = srcEnd - src;
    int32_t dstLen = 0;
    nsresult rv = mEncoder->GetMaxLength(src, srcLen, &dstLen);
    if (NS_FAILED(rv)) {
      return false;
    }
    uint32_t needed = dstWritten + dstLen;
    if (needed > INT32_MAX) {
      return false;
    }
    // Behind the scenes SetLength() makes the underlying allocation not have
    // slop, so we don't need to round up here.
    if (needed > aBytes.Length() && !aBytes.SetLength(needed,
                                                      mozilla::fallible_t())) {
      return false;
    }
    // We need to re-obtain the destination pointer on every iteration, because
    // SetLength() invalidates it.
    char* dst = aBytes.BeginWriting() + dstWritten;
    dstLen = aBytes.Length() - dstWritten;
    mEncoder->Reset();
    rv = mEncoder->Convert(src, &srcLen, dst, &dstLen);
    // Update state tracking
    src += srcLen;
    dstWritten += dstLen;
    if (rv == NS_OK_UENC_MOREOUTPUT) {
      MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
      return false;
    }
    if (rv == NS_ERROR_UENC_NOMAPPING) {
      int32_t unmappable;
      // The unmappable code unit or the first half of an unmappable surrogate
      // pair is consumed by the encoder.
      MOZ_ASSERT(srcLen > 0, "Encoder should have consumed some input.");
      char16_t codeUnit = src[-1];
      // Let's see if it is a surrogate
      size_t highBits = (codeUnit & 0xFC00);
      if (highBits == 0xD800) {
        // high surrogate
        // Let's see if we actually have a surrogate pair.
        char16_t next;
        if (src < srcEnd && NS_IS_LOW_SURROGATE((next = *src))) {
          src++; // consume the low surrogate
          unmappable = SURROGATE_TO_UCS4(codeUnit, next);
        } else {
          // unpaired surrogate.
          unmappable = 0xFFFD;
        }
      } else if (highBits == 0xDC00) {
        // low surrogate
        // This must be an unpaired surrogate.
        unmappable = 0xFFFD;
      } else {
        // not a surrogate
        unmappable = codeUnit;
      }
      // If we are encoding to ISO-2022-JP, we need to let the encoder to
      // generate a transition to the ASCII state if not already there.
      dst = aBytes.BeginWriting() + dstWritten;
      dstLen = aBytes.Length() - dstWritten;
      rv = mEncoder->Finish(dst, &dstLen);
      dstWritten += dstLen;
      if (rv != NS_OK) {
        // Failures should be impossible if GetMaxLength works. Big5 is the
        // only case where Finish() may return NS_ERROR_UENC_NOMAPPING but
        // that should never happen right after Convert() has returned it.
        MOZ_ASSERT_UNREACHABLE("Broken encoder.");
        return false;
      }
      if (!WriteNCR(aBytes, dstWritten, unmappable)) {
        return false;
      }
      continue;
    }
    if (!(rv == NS_OK || rv == NS_OK_UENC_MOREINPUT)) {
      return false;
    }
    MOZ_ASSERT(src == srcEnd, "Converter did not consume all input.");
    dst = aBytes.BeginWriting() + dstWritten;
    dstLen = aBytes.Length() - dstWritten;
    rv = mEncoder->Finish(dst, &dstLen);
    dstWritten += dstLen;
    if (rv == NS_OK_UENC_MOREOUTPUT) {
      MOZ_ASSERT_UNREACHABLE("GetMaxLength must have returned a bogus length.");
      return false;
    }
    if (rv == NS_ERROR_UENC_NOMAPPING) {
      // Big5
      if (!WriteNCR(aBytes, dstWritten, 0xFFFD)) {
        return false;
      }
    }
    return aBytes.SetLength(dstWritten, mozilla::fallible_t());
  }
}