DXR is a code search and navigation tool aimed at making sense of large projects. It supports full-text and regex searches as well as structural queries.

Implementation

Mercurial (3391eef42443)

VCS Links

Line Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef Classifier_h__
#define Classifier_h__

#include "Entries.h"
#include "HashStore.h"
#include "ProtocolParser.h"
#include "LookupCache.h"
#include "nsCOMPtr.h"
#include "nsString.h"
#include "nsIFile.h"
#include "nsDataHashtable.h"

class nsIThread;

namespace mozilla {
namespace safebrowsing {

/**
 * Maintains the stores and LookupCaches for the url classifier.
 */
class Classifier {
 public:
  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(Classifier);

  Classifier();

  nsresult Open(nsIFile& aCacheDirectory);
  void Close();
  void Reset();  // Not including any intermediary for update.

  /**
   * Clear data for specific tables.
   * If ClearType is Clear_Cache, this function will only clear cache in lookup
   * cache, otherwise, it will clear data in lookup cache and data stored on
   * disk.
   */
  enum ClearType {
    Clear_Cache,
    Clear_All,
  };
  void ResetTables(ClearType aType, const nsTArray<nsCString>& aTables);

  /**
   * Get the list of active tables and their chunks in a format
   * suitable for an update request.
   */
  void TableRequest(nsACString& aResult);

  /*
   * Get all tables that we know about.
   */
  nsresult ActiveTables(nsTArray<nsCString>& aTables) const;

  /**
   * Check URL fragments against a specified table.
   * The fragments should be generated by |LookupCache::GetLookupFragments|
   */
  nsresult CheckURIFragments(const nsTArray<nsCString>& aSpecFragments,
                             const nsACString& table,
                             LookupResultArray& aResults);

  /**
   * Asynchronously apply updates to the in-use databases. When the
   * update is complete, the caller can be notified by |aCallback|, which
   * will occur on the caller thread.
   */
  using AsyncUpdateCallback = std::function<void(nsresult)>;
  nsresult AsyncApplyUpdates(const TableUpdateArray& aUpdates,
                             const AsyncUpdateCallback& aCallback);

  /**
   * Wait until the ongoing async update is finished and callback
   * is fired. Once this function returns, AsyncApplyUpdates is
   * no longer available.
   */
  void FlushAndDisableAsyncUpdate();

  /**
   * Apply full hashes retrived from gethash to cache.
   */
  nsresult ApplyFullHashes(ConstTableUpdateArray& aUpdates);

  /*
   * Get a bunch of extra prefixes to query for completion
   * and mask the real entry being requested
   */
  nsresult ReadNoiseEntries(const Prefix& aPrefix, const nsACString& aTableName,
                            uint32_t aCount, PrefixArray& aNoiseEntries);

#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
  nsresult DumpRawTableUpdates(const nsACString& aRawUpdates);
#endif

  static void SplitTables(const nsACString& str, nsTArray<nsCString>& tables);

  // Given a root store directory, return a private store directory
  // based on the table name. To avoid migration issue, the private
  // store directory is only different from root directory for V4 tables.
  //
  // For V4 tables (suffixed by '-proto'), the private directory would
  // be [root directory path]/[provider]. The provider of V4 tables is
  // 'google4'.
  //
  // Note that if the table name is not owned by any provider, just use
  // the root directory.
  static nsresult GetPrivateStoreDirectory(nsIFile* aRootStoreDirectory,
                                           const nsACString& aTableName,
                                           const nsACString& aProvider,
                                           nsIFile** aPrivateStoreDirectory);

  // Swap in in-memory and on-disk database and remove all
  // update intermediaries.
  nsresult SwapInNewTablesAndCleanup();

  RefPtr<LookupCache> GetLookupCache(const nsACString& aTable,
                                     bool aForUpdate = false);

  void GetCacheInfo(const nsACString& aTable,
                    nsIUrlClassifierCacheInfo** aCache);

 private:
  ~Classifier();

  void DropStores();
  void DeleteTables(nsIFile* aDirectory, const nsTArray<nsCString>& aTables);

  nsresult CreateStoreDirectory();
  nsresult SetupPathNames();
  nsresult RecoverBackups();
  nsresult CleanToDelete();
  nsresult CopyInUseDirForUpdate();
  nsresult CopyDirectoryInterruptible(nsCOMPtr<nsIFile>& aDestDir,
                                      nsCOMPtr<nsIFile>& aSourceDir);
  nsresult RegenActiveTables();

  void MergeNewLookupCaches();  // Merge mNewLookupCaches into mLookupCaches.

  void CopyAndInvalidateFullHashCache();

  // Remove any intermediary for update, including in-memory
  // and on-disk data.
  void RemoveUpdateIntermediaries();

#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
  already_AddRefed<nsIFile> GetFailedUpdateDirectroy();
  nsresult DumpFailedUpdate();
#endif

  nsresult ScanStoreDir(nsIFile* aDirectory, nsTArray<nsCString>& aTables);

  nsresult UpdateHashStore(TableUpdateArray& aUpdates,
                           const nsACString& aTable);

  nsresult UpdateTableV4(TableUpdateArray& aUpdates, const nsACString& aTable);

  nsresult UpdateCache(RefPtr<const TableUpdate> aUpdates);

  RefPtr<LookupCache> GetLookupCacheForUpdate(const nsACString& aTable) {
    return GetLookupCache(aTable, true);
  }

  RefPtr<LookupCache> GetLookupCacheFrom(const nsACString& aTable,
                                         LookupCacheArray& aLookupCaches,
                                         nsIFile* aRootStoreDirectory);

  bool CheckValidUpdate(TableUpdateArray& aUpdates, const nsACString& aTable);

  nsresult LoadMetadata(nsIFile* aDirectory, nsACString& aResult);

  static nsCString GetProvider(const nsACString& aTableName);

  /**
   * The "background" part of ApplyUpdates. Once the background update
   * is called, the foreground update has to be called along with the
   * background result no matter whether the background update is
   * successful or not.
   */
  nsresult ApplyUpdatesBackground(TableUpdateArray& aUpdates,
                                  nsACString& aFailedTableName);

  /**
   * The "foreground" part of ApplyUpdates. The in-use data (in-memory and
   * on-disk) will be touched so this MUST be mutually exclusive to other
   * member functions.
   *
   * If |aBackgroundRv| is successful, the return value is the result of
   * bringing stuff to the foreground. Otherwise, the foreground table may
   * be reset according to the background update failed reason and
   * |aBackgroundRv| will be returned to forward the background update result.
   */
  nsresult ApplyUpdatesForeground(nsresult aBackgroundRv,
                                  const nsACString& aFailedTableName);

  // Used by worker thread and update thread to abort current operation.
  bool ShouldAbort() const;

  // Add built-in entries for testing.
  nsresult AddMozEntries(nsTArray<nsCString>& aTables);

  // Remove test files if exist
  nsresult ClearLegacyFiles();

  // Root dir of the Local profile.
  nsCOMPtr<nsIFile> mCacheDirectory;
  // Main directory where to store the databases.
  nsCOMPtr<nsIFile> mRootStoreDirectory;
  // Used for atomically updating the other dirs.
  nsCOMPtr<nsIFile> mBackupDirectory;
  nsCOMPtr<nsIFile> mUpdatingDirectory;  // For update only.
  nsCOMPtr<nsIFile> mToDeleteDirectory;
  LookupCacheArray mLookupCaches;  // For query only.
  nsTArray<nsCString> mActiveTablesCache;
  uint32_t mHashKey;

  // In-memory cache for the result of TableRequest. See
  // nsIUrlClassifierDBService.getTables for the format.
  nsCString mTableRequestResult;

  // Whether mTableRequestResult is outdated and needs to
  // be reloaded from disk.
  bool mIsTableRequestResultOutdated;

  // The copy of mLookupCaches for update only.
  LookupCacheArray mNewLookupCaches;

  // True when Reset() is called.
  bool mUpdateInterrupted;

  // True once CLose() has been called
  bool mIsClosed;

  nsCOMPtr<nsIThread> mUpdateThread;  // For async update.

  // Identical to mRootStoreDirectory but for update only because
  // nsIFile is not thread safe and mRootStoreDirectory needs to
  // be accessed in CopyInUseDirForUpdate().
  // It will be initialized right before update on the worker thread.
  nsCOMPtr<nsIFile> mRootStoreDirectoryForUpdate;
};

}  // namespace safebrowsing
}  // namespace mozilla

#endif