mozHunspell.cpp - mozsearch

mozilla-central/extensions/spellcheck/hunspell/glue/mozHunspell.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/******* BEGIN LICENSE BLOCK *******

 * Version: MPL 1.1/GPL 2.0/LGPL 2.1

 * The contents of this file are subject to the Mozilla Public License Version

 * 1.1 (the "License"); you may not use this file except in compliance with

 * the License. You may obtain a copy of the License at

 * http://www.mozilla.org/MPL/

 * Software distributed under the License is distributed on an "AS IS" basis,

 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

 * for the specific language governing rights and limitations under the

 * License.

 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)

 * and László Németh (Hunspell). Portions created by the Initial Developers

 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.

 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)

 *                 David Einstein (deinst@world.std.com)

 *                 Michiel van Leeuwen (mvl@exedo.nl)

 *                 Caolan McNamara (cmc@openoffice.org)

 *                 László Németh (nemethl@gyorsposta.hu)

 *                 Davide Prina

 *                 Giuseppe Modugno

 *                 Gianluca Turconi

 *                 Simon Brouwer

 *                 Noll Janos

 *                 Biro Arpad

 *                 Goldman Eleonora

 *                 Sarlos Tamas

 *                 Bencsath Boldizsar

 *                 Halacsy Peter

 *                 Dvornik Laszlo

 *                 Gefferth Andras

 *                 Nagy Viktor

 *                 Varga Daniel

 *                 Chris Halls

 *                 Rene Engelhard

 *                 Bram Moolenaar

 *                 Dafydd Jones

 *                 Harri Pitkanen

 *                 Andras Timar

 *                 Tor Lillqvist

 *                 Jesper Kristensen (mail@jesperkristensen.dk)

 * Alternatively, the contents of this file may be used under the terms of

 * either the GNU General Public License Version 2 or later (the "GPL"), or

 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

 * in which case the provisions of the GPL or the LGPL are applicable instead

 * of those above. If you wish to allow use of your version of this file only

 * under the terms of either the GPL or the LGPL, and not to allow others to

 * use your version of this file under the terms of the MPL, indicate your

 * decision by deleting the provisions above and replace them with the notice

 * and other provisions required by the GPL or the LGPL. If you do not delete

 * the provisions above, a recipient may use your version of this file under

 * the terms of any one of the MPL, the GPL or the LGPL.

 ******* END LICENSE BLOCK *******/

#include "mozHunspell.h"

#include "nsReadableUtils.h"

#include "nsString.h"

#include "nsIObserverService.h"

#include "nsIDirectoryEnumerator.h"

#include "nsIFile.h"

#include "nsUnicharUtils.h"

#include "nsCRT.h"

#include "mozInlineSpellChecker.h"

#include "nsIPrefBranch.h"

#include "nsIPrefService.h"

#include "nsNetUtil.h"

#include "prenv.h"

#include "mozilla/Components.h"

#include "mozilla/Services.h"

#include "mozilla/dom/ContentParent_NotifyUpdatedDictionaries.h"

#include <stdlib.h>

#include <tuple>

using namespace mozilla;

NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell)

NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell)

NS_INTERFACE_MAP_BEGIN(mozHunspell)

  NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine)

  NS_INTERFACE_MAP_ENTRY(nsIObserver)

  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)

  NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter)

  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine)

  NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell)

NS_INTERFACE_MAP_END

NS_IMPL_CYCLE_COLLECTION_WEAK(mozHunspell, mPersonalDictionary)

NS_IMPL_COMPONENT_FACTORY(mozHunspell) {

  auto hunspell = MakeRefPtr<mozHunspell>();

  if (NS_SUCCEEDED(hunspell->Init())) {

    return hunspell.forget().downcast<mozISpellCheckingEngine>();

  return nullptr;

mozHunspell::mozHunspell() {

#ifdef DEBUG

  // There must be only one instance of this class: it reports memory based on

  // a single static count in HunspellAllocator.

  static bool hasRun = false;

  MOZ_ASSERT(!hasRun);

  hasRun = true;

#endif

nsresult mozHunspell::Init() {

  LoadDictionaryList(false);

  nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();

  if (obs) {

    obs->AddObserver(this, "profile-do-change", true);

    obs->AddObserver(this, "profile-after-change", true);

  mozilla::RegisterWeakMemoryReporter(this);

  return NS_OK;

mozHunspell::~mozHunspell() {

  mozilla::UnregisterWeakMemoryReporter(this);

  mPersonalDictionary = nullptr;

  mHunspells.Clear();

NS_IMETHODIMP

mozHunspell::GetDictionaries(nsTArray<nsCString>& aDictionaries) {

  MOZ_ASSERT(aDictionaries.IsEmpty());

  for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) {

    if (iter.Data().mEnabled) {

      aDictionaries.AppendElement(iter.Key());

  return NS_OK;

/* Set the Dictionaries.

 * This also Loads the dictionaries and initializes the converter using the

 * dictionaries converter

*/

NS_IMETHODIMP

mozHunspell::SetDictionaries(const nsTArray<nsCString>& aDictionaries) {

  if (aDictionaries.IsEmpty()) {

    mHunspells.Clear();

    return NS_OK;

  // Disable any dictionaries we've already loaded that we're not

  // going to use.

  for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {

    if (!aDictionaries.Contains(iter.Key())) {

      iter.Data().mEnabled = false;

  bool firstDictionary = true;

  for (const auto& dictionary : aDictionaries) {

    NS_ConvertUTF8toUTF16 dict(dictionary);

    nsIURI* affFile = mDictionaries.GetWeak(dict);

    if (!affFile) {

      return NS_ERROR_FILE_NOT_FOUND;

    nsAutoCString affFileName;

    nsresult rv = affFile->GetSpec(affFileName);

    NS_ENSURE_SUCCESS(rv, rv);

    if (auto entry = mHunspells.Lookup(dictionary)) {

      if (entry.Data().mAffixFileName == affFileName) {

        entry.Data().mEnabled = true;

        continue;

    DictionaryData dictionaryData;

    dictionaryData.mAffixFileName = affFileName;

    // Load the first dictionary now, we'll load the others lazily during

    // checking.

    if (firstDictionary) {

      rv = dictionaryData.LoadIfNecessary();

      NS_ENSURE_SUCCESS(rv, rv);

      firstDictionary = false;

    mHunspells.InsertOrUpdate(dictionary, std::move(dictionaryData));

  // If we have a large number of dictionaries loaded, try freeing any disabled

  // dictionaries to limit memory use.

  if (mHunspells.Count() > 10) {

    mHunspells.RemoveIf([](const auto& iter) { return !iter.Data().mEnabled; });

  return NS_OK;

NS_IMETHODIMP mozHunspell::GetPersonalDictionary(

    mozIPersonalDictionary** aPersonalDictionary) {

  *aPersonalDictionary = mPersonalDictionary;

  NS_IF_ADDREF(*aPersonalDictionary);

  return NS_OK;

NS_IMETHODIMP mozHunspell::SetPersonalDictionary(

    mozIPersonalDictionary* aPersonalDictionary) {

  mPersonalDictionary = aPersonalDictionary;

  return NS_OK;

NS_IMETHODIMP mozHunspell::GetDictionaryList(

    nsTArray<nsCString>& aDictionaries) {

  MOZ_ASSERT(aDictionaries.IsEmpty());

  for (const auto& key : mDictionaries.Keys()) {

    aDictionaries.AppendElement(NS_ConvertUTF16toUTF8(key));

  return NS_OK;

void mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses) {

  mDictionaries.Clear();

  nsresult rv;

  // find built in dictionaries, or dictionaries specified in

  // spellchecker.dictionary_path in prefs

  nsCOMPtr<nsIFile> dictDir;

  // check preferences first

  nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));

  if (prefs) {

    nsAutoCString extDictPath;

    rv = prefs->GetCharPref("spellchecker.dictionary_path", extDictPath);

    if (NS_SUCCEEDED(rv)) {

      // set the spellchecker.dictionary_path

      rv = NS_NewNativeLocalFile(extDictPath, true, getter_AddRefs(dictDir));

    if (dictDir) {

      LoadDictionariesFromDir(dictDir);

  // find dictionaries in DICPATH

  char* dicEnv = PR_GetEnv("DICPATH");

  if (dicEnv) {

    // do a two-pass dance so dictionaries are loaded right-to-left as

    // preference

    nsTArray<nsCOMPtr<nsIFile>> dirs;

    nsAutoCString env(dicEnv);  // assume dicEnv is UTF-8

    char* currPath = nullptr;

    char* nextPaths = env.BeginWriting();

    while ((currPath = NS_strtok(":", &nextPaths))) {

      nsCOMPtr<nsIFile> dir;

      rv =

          NS_NewNativeLocalFile(nsCString(currPath), true, getter_AddRefs(dir));

      if (NS_SUCCEEDED(rv)) {

        dirs.AppendElement(dir);

    // load them in reverse order so they override each other properly

    for (int32_t i = dirs.Length() - 1; i >= 0; i--) {

      LoadDictionariesFromDir(dirs[i]);

  // find dictionaries from restartless extensions

  for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) {

    LoadDictionariesFromDir(mDynamicDirectories[i]);

  for (const auto& dictionaryEntry : mDynamicDictionaries) {

    mDictionaries.InsertOrUpdate(dictionaryEntry.GetKey(),

                                 dictionaryEntry.GetData());

  DictionariesChanged(aNotifyChildProcesses);

void mozHunspell::DictionariesChanged(bool aNotifyChildProcesses) {

  // Now we have finished updating the list of dictionaries, update the current

  // dictionary and any editors which may use it.

  mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking();

  if (aNotifyChildProcesses) {

    mozilla::dom::ContentParent_NotifyUpdatedDictionaries();

  // Check if the current dictionaries are still available.

  // If not, try to replace it with other dictionaries of the same language.

  if (!mHunspells.IsEmpty()) {

    nsTArray<nsCString> dictionaries;

    for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) {

      if (iter.Data().mEnabled) {

        dictionaries.AppendElement(iter.Key());

    nsresult rv = SetDictionaries(dictionaries);

    if (NS_SUCCEEDED(rv)) return;

  // If the current dictionaries are gone, and we don't have a good replacement,

  // set no current dictionary.

  if (!mHunspells.IsEmpty()) {

    nsTArray<nsCString> empty;

    SetDictionaries(empty);

NS_IMETHODIMP

mozHunspell::LoadDictionariesFromDir(nsIFile* aDir) {

  nsresult rv;

  bool check = false;

  rv = aDir->Exists(&check);

  if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED;

  rv = aDir->IsDirectory(&check);

  if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED;

  nsCOMPtr<nsIDirectoryEnumerator> files;

  rv = aDir->GetDirectoryEntries(getter_AddRefs(files));

  if (NS_FAILED(rv)) return NS_ERROR_UNEXPECTED;

  nsCOMPtr<nsIFile> file;

  while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) {

    nsAutoString leafName;

    file->GetLeafName(leafName);

    if (!StringEndsWith(leafName, u".dic"_ns)) continue;

    nsAutoString dict(leafName);

    dict.SetLength(dict.Length() - 4);  // magic length of ".dic"

    // check for the presence of the .aff file

    leafName = dict;

    leafName.AppendLiteral(".aff");

    file->SetLeafName(leafName);

    rv = file->Exists(&check);

    if (NS_FAILED(rv) || !check) continue;

    // Replace '_' separator with '-'

    dict.ReplaceChar('_', '-');

    nsCOMPtr<nsIURI> uri;

    rv = NS_NewFileURI(getter_AddRefs(uri), file);

    NS_ENSURE_SUCCESS(rv, rv);

    mDictionaries.InsertOrUpdate(dict, uri);

  return NS_OK;

nsresult mozHunspell::DictionaryData::ConvertCharset(const nsAString& aStr,

                                                     std::string& aDst) {

  if (NS_WARN_IF(!mEncoder)) {

    return NS_ERROR_NOT_INITIALIZED;

  auto src = Span(aStr.BeginReading(), aStr.Length());

  CheckedInt<size_t> needed =

      mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(src.Length());

  if (!needed.isValid()) {

    return NS_ERROR_OUT_OF_MEMORY;

  aDst.resize(needed.value());

  char* dstPtr = &aDst[0];

  auto dst = Span(reinterpret_cast<uint8_t*>(dstPtr), needed.value());

  uint32_t result;

  size_t written;

  std::tie(result, std::ignore, written) =

      mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);

  MOZ_ASSERT(result != kOutputFull);

  if (result != kInputEmpty) {

    return NS_ERROR_UENC_NOMAPPING;

  aDst.resize(written);

  mEncoder->Encoding()->NewEncoderInto(*mEncoder);

  return NS_OK;

nsresult mozHunspell::DictionaryData::LoadIfNecessary() {

  if (mHunspell && mEncoder && mDecoder) {

    return NS_OK;

  if (mLoadFailed) {

    return NS_ERROR_FAILURE;

  nsCString dictFileName = mAffixFileName;

  int32_t dotPos = dictFileName.RFindChar('.');

  if (dotPos == -1) {

    mLoadFailed = true;

    return NS_ERROR_FAILURE;

  dictFileName.SetLength(dotPos);

  dictFileName.AppendLiteral(".dic");

  UniquePtr<RLBoxHunspell> hunspell(

      RLBoxHunspell::Create(mAffixFileName, dictFileName));

  if (!hunspell) {

    mLoadFailed = true;

    // TODO Bug 1788857: Verify error propagation in case of inaccessible file

    return NS_ERROR_OUT_OF_MEMORY;

  mHunspell = std::move(hunspell);

  auto encoding =

      Encoding::ForLabelNoReplacement(mHunspell->get_dict_encoding());

  if (!encoding) {

    mLoadFailed = true;

    return NS_ERROR_UCONV_NOCONV;

  mEncoder = encoding->NewEncoder();

  mDecoder = encoding->NewDecoderWithoutBOMHandling();

  return NS_OK;

NS_IMETHODIMP

mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport,

                            nsISupports* aData, bool aAnonymize) {

  MOZ_COLLECT_REPORT("explicit/spell-check", KIND_HEAP, UNITS_BYTES,

                     HunspellAllocator::MemoryAllocated(),

                     "Memory used by the spell-checking engine.");

  return NS_OK;

NS_IMETHODIMP

mozHunspell::Check(const nsAString& aWord, bool* aResult) {

  if (NS_WARN_IF(!aResult)) {

    return NS_ERROR_INVALID_ARG;

  if (NS_WARN_IF(mHunspells.IsEmpty())) {

    return NS_ERROR_FAILURE;

  *aResult = true;

  for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {

    if (!iter.Data().mEnabled) {

      continue;

    nsresult rv = iter.Data().LoadIfNecessary();

    if (NS_FAILED(rv)) {

      continue;

    std::string charsetWord;

    rv = iter.Data().ConvertCharset(aWord, charsetWord);

    if (NS_FAILED(rv)) {

      continue;

    // Depending upon the encoding, we might end up with a string that begins

    // with the null byte. Since the hunspell interface uses C-style strings,

    // this appears like an empty string, and hunspell marks empty strings as

    // spelled correctly. Skip these cases to allow another dictionary to have

    // the chance to spellcheck them.

    if (charsetWord.empty() || charsetWord[0] == 0) {

      continue;

    *aResult = iter.Data().mHunspell->spell(charsetWord);

    if (*aResult) {

      break;

  if (!*aResult && mPersonalDictionary) {

    return mPersonalDictionary->Check(aWord, aResult);

  return NS_OK;

NS_IMETHODIMP

mozHunspell::Suggest(const nsAString& aWord, nsTArray<nsString>& aSuggestions) {

  if (NS_WARN_IF(mHunspells.IsEmpty())) {

    return NS_ERROR_FAILURE;

  MOZ_ASSERT(aSuggestions.IsEmpty());

  for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) {

    if (!iter.Data().mEnabled) {

      continue;

    nsresult rv = iter.Data().LoadIfNecessary();

    if (NS_FAILED(rv)) {

      continue;

    std::string charsetWord;

    rv = iter.Data().ConvertCharset(aWord, charsetWord);

    NS_ENSURE_SUCCESS(rv, rv);

    std::vector<std::string> suggestions =

        iter.Data().mHunspell->suggest(charsetWord);

    if (!suggestions.empty()) {

      aSuggestions.SetCapacity(aSuggestions.Length() + suggestions.size());

      for (Span<const char> charSrc : suggestions) {

        // Convert the suggestion to utf16

        auto src = AsBytes(charSrc);

        nsresult rv =

            iter.Data().mDecoder->Encoding()->DecodeWithoutBOMHandling(

                src, *aSuggestions.AppendElement());

        NS_ENSURE_SUCCESS(rv, rv);

        iter.Data().mDecoder->Encoding()->NewDecoderWithoutBOMHandlingInto(

            *iter.Data().mDecoder);

  return NS_OK;

NS_IMETHODIMP

mozHunspell::Observe(nsISupports* aSubj, const char* aTopic,

                     const char16_t* aData) {

  NS_ASSERTION(!strcmp(aTopic, "profile-do-change") ||

                   !strcmp(aTopic, "profile-after-change"),

               "Unexpected observer topic");

  LoadDictionaryList(false);

  return NS_OK;

NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile* aDir) {

  mDynamicDirectories.AppendObject(aDir);

  LoadDictionaryList(true);

  return NS_OK;

NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile* aDir) {

  mDynamicDirectories.RemoveObject(aDir);

  LoadDictionaryList(true);

#ifdef MOZ_THUNDERBIRD

/*

   * This notification is needed for Thunderbird. Thunderbird derives the

   * dictionary from the document's "lang" attribute. If a dictionary is

   * removed, we need to change the "lang" attribute.

*/

  nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();

  if (obs) {

    obs->NotifyObservers(nullptr, SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION,

                         nullptr);

#endif

  return NS_OK;

NS_IMETHODIMP mozHunspell::AddDictionary(const nsAString& aLang,

                                         nsIURI* aFile) {

  NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG);

  mDynamicDictionaries.InsertOrUpdate(aLang, aFile);

  mDictionaries.InsertOrUpdate(aLang, aFile);

  DictionariesChanged(true);

  return NS_OK;

NS_IMETHODIMP mozHunspell::RemoveDictionary(const nsAString& aLang,

                                            nsIURI* aFile, bool* aRetVal) {

  NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG);

  *aRetVal = false;

  nsCOMPtr<nsIURI> file = mDynamicDictionaries.Get(aLang);

  bool equal;

  if (file && NS_SUCCEEDED(file->Equals(aFile, &equal)) && equal) {

    mDynamicDictionaries.Remove(aLang);

    LoadDictionaryList(true);

    *aRetVal = true;

  return NS_OK;