DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Header

Mercurial (b6d82b1a6b02)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "nsCyrillicProb.h"
#include <stdio.h>

#include "nsCOMPtr.h"
#include "nsISupports.h"
#include "nsICharsetDetector.h"
#include "nsICharsetDetectionObserver.h"
#include "nsIStringCharsetDetector.h"
#include "nsCyrillicDetector.h"

//----------------------------------------------------------------------
// Interface nsISupports [implementation]
NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)

void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen) {
  uint8_t cls;
  const char* b;
  uint32_t i;
  if (mDone) return;
  for (i = 0, b = aBuf; i < aLen; i++, b++) {
    for (unsigned j = 0; j < mItems; j++) {
      if (0x80 & *b)
        cls = mCyrillicClass[j][(*b) & 0x7F];
      else
        cls = 0;
      NS_ASSERTION(cls <= 32, "illegal character class");
      mProb[j] += gCyrillicProb[mLastCls[j]][cls];
      mLastCls[j] = cls;
    }
  }
  // We now only based on the first block we receive
  DataEnd();
}

//---------------------------------------------------------------------
#define THRESHOLD_RATIO 1.5f
void nsCyrillicDetector::DataEnd() {
  uint32_t max = 0;
  uint8_t maxIdx = 0;
  uint8_t j;
  if (mDone) return;
  for (j = 0; j < mItems; j++) {
    if (mProb[j] > max) {
      max = mProb[j];
      maxIdx = j;
    }
  }

  if (0 == max)  // if we didn't get any 8 bits data
    return;

#ifdef DEBUG
  for (j = 0; j < mItems; j++)
    printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
#endif
  this->Report(mCharsets[maxIdx]);
  mDone = true;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector::nsCyrXPCOMDetector(uint8_t aItems,
                                       const uint8_t** aCyrillicClass,
                                       const char** aCharsets)
    : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) {
  mObserver = nullptr;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector::~nsCyrXPCOMDetector() {}

//---------------------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) {
  NS_ASSERTION(mObserver == nullptr, "Init twice");
  if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE;

  mObserver = aObserver;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen,
                                       bool* oDontFeedMe) {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");

  if ((nullptr == aBuf) || (nullptr == oDontFeedMe))
    return NS_ERROR_ILLEGAL_VALUE;

  this->HandleData(aBuf, aLen);
  *oDontFeedMe = false;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Done() {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");
  this->DataEnd();
  return NS_OK;
}

//----------------------------------------------------------
void nsCyrXPCOMDetector::Report(const char* aCharset) {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");
  mObserver->Notify(aCharset, eBestAnswer);
}