Source code

Revision control

Copy as Markdown

Other Tools

//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "LookupCache.h"
#include "LookupCacheV4.h"
#include "HashStore.h"
#include "nsIFileStreams.h"
#include "nsISeekableStream.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Logging.h"
#include "nsNetUtil.h"
#include "nsCheckSummedOutputStream.h"
#include "crc32c.h"
#include "prprf.h"
#include "Classifier.h"
#include "nsUrlClassifierInfo.h"
#include "nsUrlClassifierUtils.h"
#include "nsUrlClassifierDBService.h"
#ifdef DEBUG
# include "nsPrintfCString.h"
#endif
// We act as the main entry point for all the real lookups,
// so note that those are not done to the actual HashStore.
// The latter solely exists to store the data needed to handle
// the updates from the protocol.
// This module provides a front for PrefixSet, mUpdateCompletions,
// and mGetHashCache, which together contain everything needed to
// provide a classification as long as the data is up to date.
// PrefixSet stores and provides lookups for 4-byte prefixes.
// mUpdateCompletions contains 32-byte completions which were
// contained in updates. They are retrieved from HashStore/.sbtore
// on startup.
// mGetHashCache contains 32-byte completions which were
// returned from the gethash server. They are not serialized,
// only cached until the next update.
// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) \
MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() \
MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
namespace mozilla {
namespace safebrowsing {
const uint32_t LookupCache::MAX_BUFFER_SIZE = 64 * 1024;
const int CacheResultV2::VER = CacheResult::V2;
const int CacheResultV4::VER = CacheResult::V4;
const int LookupCacheV2::VER = 2;
const uint32_t LookupCacheV2::VLPSET_MAGIC = 0xe5b862e7;
const uint32_t LookupCacheV2::VLPSET_VERSION = 1;
namespace {
//////////////////////////////////////////////////////////////////////////
// A set of lightweight functions for reading/writing value from/to file.
template <typename T>
struct ValueTraits {
static_assert(sizeof(T) <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
"LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
static uint32_t Length(const T& aValue) { return sizeof(T); }
static char* WritePtr(T& aValue, uint32_t aLength) { return (char*)&aValue; }
static const char* ReadPtr(const T& aValue) { return (char*)&aValue; }
static bool IsFixedLength() { return true; }
};
template <>
struct ValueTraits<nsACString> {
static bool IsFixedLength() { return false; }
static uint32_t Length(const nsACString& aValue) { return aValue.Length(); }
static char* WritePtr(nsACString& aValue, uint32_t aLength) {
aValue.SetLength(aLength);
return aValue.BeginWriting();
}
static const char* ReadPtr(const nsACString& aValue) {
return aValue.BeginReading();
}
};
void CStringToHexString(const nsACString& aIn, nsACString& aOut) {
static const char* const lut = "0123456789ABCDEF";
size_t len = aIn.Length();
MOZ_ASSERT(len <= COMPLETE_SIZE);
aOut.SetCapacity(2 * len);
for (size_t i = 0; i < aIn.Length(); ++i) {
const char c = static_cast<char>(aIn[i]);
aOut.Append(lut[(c >> 4) & 0x0F]);
aOut.Append(lut[c & 15]);
}
}
#ifdef DEBUG
nsCString GetFormattedTimeString(int64_t aCurTimeSec) {
PRExplodedTime pret;
PR_ExplodeTime(aCurTimeSec * PR_USEC_PER_SEC, PR_GMTParameters, &pret);
return nsPrintfCString("%04d-%02d-%02d %02d:%02d:%02d UTC", pret.tm_year,
pret.tm_month + 1, pret.tm_mday, pret.tm_hour,
pret.tm_min, pret.tm_sec);
}
#endif
} // end of unnamed namespace.
////////////////////////////////////////////////////////////////////////
template <typename T>
nsresult LookupCache::WriteValue(nsIOutputStream* aOutputStream,
const T& aValue) {
uint32_t writeLength = ValueTraits<T>::Length(aValue);
MOZ_ASSERT(writeLength <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
"LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
if (!ValueTraits<T>::IsFixedLength()) {
// We need to write out the variable value length.
nsresult rv = WriteValue(aOutputStream, writeLength);
NS_ENSURE_SUCCESS(rv, rv);
}
// Write out the value.
auto valueReadPtr = ValueTraits<T>::ReadPtr(aValue);
uint32_t written;
nsresult rv = aOutputStream->Write(valueReadPtr, writeLength, &written);
NS_ENSURE_SUCCESS(rv, rv);
if (NS_WARN_IF(written != writeLength)) {
return NS_ERROR_FAILURE;
}
return rv;
}
template <typename T>
nsresult LookupCache::ReadValue(nsIInputStream* aInputStream, T& aValue) {
nsresult rv;
uint32_t readLength;
if (ValueTraits<T>::IsFixedLength()) {
readLength = ValueTraits<T>::Length(aValue);
} else {
// Read the variable value length from file.
nsresult rv = ReadValue(aInputStream, readLength);
NS_ENSURE_SUCCESS(rv, rv);
}
// Sanity-check the readLength in case of disk corruption
// (see bug 1433636).
if (readLength > LookupCacheV4::MAX_METADATA_VALUE_LENGTH) {
return NS_ERROR_FILE_CORRUPTED;
}
// Read the value.
uint32_t read;
auto valueWritePtr = ValueTraits<T>::WritePtr(aValue, readLength);
rv = aInputStream->Read(valueWritePtr, readLength, &read);
if (NS_FAILED(rv) || read != readLength) {
LOG(("Failed to read the value."));
return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE;
}
return rv;
}
// These symbols are referenced from another compilation unit, but their
// implementation depends on local symbols. Workaround this by forcing their
// instantiation there.
template nsresult mozilla::safebrowsing::LookupCache::WriteValue(
nsIOutputStream*, nsTSubstring<char> const&);
template nsresult mozilla::safebrowsing::LookupCache::ReadValue(
nsIInputStream*, nsTSubstring<char>&);
LookupCache::LookupCache(const nsACString& aTableName,
const nsACString& aProvider,
nsCOMPtr<nsIFile>& aRootStoreDir)
: mPrimed(false),
mTableName(aTableName),
mProvider(aProvider),
mRootStoreDirectory(aRootStoreDir),
mVLPrefixSet(nullptr) {
UpdateRootDirHandle(mRootStoreDirectory);
}
nsresult LookupCache::Open() {
LOG(("Loading PrefixSet for %s", mTableName.get()));
nsresult rv;
if (nsUrlClassifierUtils::IsMozTestTable(mTableName)) {
// For built-in test table, we don't load it from disk,
// test entries are directly added in memory.
rv = LoadMozEntries();
} else {
rv = LoadPrefixSet();
}
Unused << NS_WARN_IF(NS_FAILED(rv));
return rv;
}
nsresult LookupCache::Init() {
MOZ_ASSERT(!mVLPrefixSet);
mVLPrefixSet = new VariableLengthPrefixSet();
nsresult rv = mVLPrefixSet->Init(mTableName);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult LookupCache::UpdateRootDirHandle(
nsCOMPtr<nsIFile>& aNewRootStoreDirectory) {
nsresult rv;
if (aNewRootStoreDirectory != mRootStoreDirectory) {
rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
NS_ENSURE_SUCCESS(rv, rv);
}
rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory, mTableName,
mProvider,
getter_AddRefs(mStoreDirectory));
if (NS_FAILED(rv)) {
LOG(("Failed to get private store directory for %s", mTableName.get()));
mStoreDirectory = mRootStoreDirectory;
}
if (LOG_ENABLED()) {
nsString path;
mStoreDirectory->GetPath(path);
LOG(("Private store directory for %s is %s", mTableName.get(),
NS_ConvertUTF16toUTF8(path).get()));
}
return rv;
}
nsresult LookupCache::WriteFile() {
if (nsUrlClassifierDBService::ShutdownHasStarted()) {
return NS_ERROR_ABORT;
}
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
rv = StoreToFile(psFile);
if (NS_WARN_IF(NS_FAILED(rv))) {
LOG(("Failed to store the prefixset for table %s", mTableName.get()));
return rv;
}
return NS_OK;
}
nsresult LookupCache::CheckCache(const Completion& aCompletion, bool* aHas,
bool* aConfirmed) {
// Shouldn't call this function if prefix is not in the database.
MOZ_ASSERT(*aHas);
*aConfirmed = false;
uint32_t prefix = aCompletion.ToUint32();
CachedFullHashResponse* fullHashResponse = mFullHashCache.Get(prefix);
if (!fullHashResponse) {
return NS_OK;
}
int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
int64_t expiryTimeSec;
FullHashExpiryCache& fullHashes = fullHashResponse->fullHashes;
nsDependentCSubstring completion(
reinterpret_cast<const char*>(aCompletion.buf), COMPLETE_SIZE);
// Check if we can find the fullhash in positive cache
if (fullHashes.Get(completion, &expiryTimeSec)) {
if (nowSec <= expiryTimeSec) {
// Url is NOT safe.
*aConfirmed = true;
LOG(("Found a valid fullhash in the positive cache"));
} else {
// Trigger a gethash request in this case(aConfirmed is false).
LOG(("Found an expired fullhash in the positive cache"));
// Remove fullhash entry from the cache when the negative cache
// is also expired because whether or not the fullhash is cached
// locally, we will need to consult the server next time we
// lookup this hash. We may as well remove it from our cache.
if (fullHashResponse->negativeCacheExpirySec < expiryTimeSec) {
fullHashes.Remove(completion);
if (fullHashes.Count() == 0 &&
fullHashResponse->negativeCacheExpirySec < nowSec) {
mFullHashCache.Remove(prefix);
}
}
}
return NS_OK;
}
// Check negative cache.
if (fullHashResponse->negativeCacheExpirySec >= nowSec) {
// Url is safe.
LOG(("Found a valid prefix in the negative cache"));
*aHas = false;
} else {
LOG(("Found an expired prefix in the negative cache"));
if (fullHashes.Count() == 0) {
mFullHashCache.Remove(prefix);
}
}
return NS_OK;
}
// This function remove cache entries whose negative cache time is expired.
// It is possible that a cache entry whose positive cache time is not yet
// expired but still being removed after calling this API. Right now we call
// this on every update.
void LookupCache::InvalidateExpiredCacheEntries() {
int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
for (auto iter = mFullHashCache.Iter(); !iter.Done(); iter.Next()) {
CachedFullHashResponse* response = iter.UserData();
if (response->negativeCacheExpirySec < nowSec) {
iter.Remove();
}
}
}
void LookupCache::CopyFullHashCache(const LookupCache* aSource) {
if (!aSource) {
return;
}
CopyClassHashTable<FullHashResponseMap>(aSource->mFullHashCache,
mFullHashCache);
}
void LookupCache::ClearCache() { mFullHashCache.Clear(); }
void LookupCache::ClearAll() {
ClearCache();
ClearPrefixes();
mPrimed = false;
}
nsresult LookupCache::ClearPrefixes() {
// Clear by seting a empty map
PrefixStringMap map;
return mVLPrefixSet->SetPrefixes(map);
}
bool LookupCache::IsEmpty() const {
bool isEmpty;
mVLPrefixSet->IsEmpty(&isEmpty);
return isEmpty;
}
void LookupCache::GetCacheInfo(nsIUrlClassifierCacheInfo** aCache) const {
MOZ_ASSERT(aCache);
RefPtr<nsUrlClassifierCacheInfo> info = new nsUrlClassifierCacheInfo;
info->table = mTableName;
for (const auto& cacheEntry : mFullHashCache) {
RefPtr<nsUrlClassifierCacheEntry> entry = new nsUrlClassifierCacheEntry;
// Set prefix of the cache entry.
nsAutoCString prefix(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
PREFIX_SIZE);
CStringToHexString(prefix, entry->prefix);
// Set expiry of the cache entry.
CachedFullHashResponse* response = cacheEntry.GetWeak();
entry->expirySec = response->negativeCacheExpirySec;
// Set positive cache.
FullHashExpiryCache& fullHashes = response->fullHashes;
for (const auto& fullHashEntry : fullHashes) {
RefPtr<nsUrlClassifierPositiveCacheEntry> match =
new nsUrlClassifierPositiveCacheEntry;
// Set fullhash of positive cache entry.
CStringToHexString(fullHashEntry.GetKey(), match->fullhash);
// Set expiry of positive cache entry.
match->expirySec = fullHashEntry.GetData();
entry->matches.AppendElement(
static_cast<nsIUrlClassifierPositiveCacheEntry*>(match));
}
info->entries.AppendElement(
static_cast<nsIUrlClassifierCacheEntry*>(entry));
}
info.forget(aCache);
}
/* static */
bool LookupCache::IsCanonicalizedIP(const nsACString& aHost) {
// The canonicalization process will have left IP addresses in dotted
// decimal with no surprises.
uint32_t i1, i2, i3, i4;
char c;
if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", &i1, &i2, &i3,
&i4, &c) == 4) {
return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
}
return false;
}
// This is used when the URL is created by CreatePairwiseEntityListURI(),
// which returns an URI like "toplevel.page/?resource=third.party.domain"
// The fragment rule for the hostname(toplevel.page) is still the same
// as Safe Browsing protocol.
// The difference is that we always keep the path and query string and
// generate an additional fragment by removing the leading component of
// third.party.domain. This is to make sure we can find a match when a
// exceptionlisted domain is eTLD.
/* static */
nsresult LookupCache::GetLookupEntitylistFragments(
const nsACString& aSpec, nsTArray<nsCString>* aFragments) {
aFragments->Clear();
nsACString::const_iterator begin, end, iter, iter_end;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
iter_end = end;
// Fallback to use default fragment rule when the URL doesn't contain
// "/?resoruce=" because this means the URL is not generated in
// CreatePairwiseEntityListURI()
if (!FindInReadable("/?resource="_ns, iter, iter_end)) {
return GetLookupFragments(aSpec, aFragments);
}
const nsACString& topLevelURL = Substring(begin, iter++);
const nsACString& thirdPartyURL = Substring(iter_end, end);
/**
* For the top-level URL, we follow the host fragment rule defined
* in the Safe Browsing protocol.
*/
nsTArray<nsCString> topLevelURLs;
topLevelURLs.AppendElement(topLevelURL);
if (!IsCanonicalizedIP(topLevelURL)) {
topLevelURL.BeginReading(begin);
topLevelURL.EndReading(end);
int numTopLevelURLComponents = 0;
while (RFindInReadable("."_ns, begin, end) &&
numTopLevelURLComponents < MAX_HOST_COMPONENTS) {
// don't bother checking toplevel domains
if (++numTopLevelURLComponents >= 2) {
topLevelURL.EndReading(iter);
topLevelURLs.AppendElement(Substring(end, iter));
}
end = begin;
topLevelURL.BeginReading(begin);
}
}
/**
* The whiltelisted domain in the entity list may be eTLD or eTLD+1.
* Since the number of the domain name part in the third-party URL searching
* is always less than or equal to eTLD+1, we remove the leading
* component from the third-party domain to make sure we can find a match
* if the exceptionlisted domain stoed in the entity list is eTLD.
*/
nsTArray<nsCString> thirdPartyURLs;
thirdPartyURLs.AppendElement(thirdPartyURL);
if (!IsCanonicalizedIP(thirdPartyURL)) {
thirdPartyURL.BeginReading(iter);
thirdPartyURL.EndReading(end);
if (FindCharInReadable('.', iter, end)) {
iter++;
nsAutoCString thirdPartyURLToAdd;
thirdPartyURLToAdd.Assign(Substring(iter++, end));
// don't bother checking toplevel domains
if (FindCharInReadable('.', iter, end)) {
thirdPartyURLs.AppendElement(thirdPartyURLToAdd);
}
}
}
for (size_t i = 0; i < topLevelURLs.Length(); i++) {
for (size_t j = 0; j < thirdPartyURLs.Length(); j++) {
nsAutoCString key;
key.Assign(topLevelURLs[i]);
key.Append("/?resource=");
key.Append(thirdPartyURLs[j]);
aFragments->AppendElement(key);
}
}
return NS_OK;
}
/* static */
nsresult LookupCache::GetLookupFragments(const nsACString& aSpec,
nsTArray<nsCString>* aFragments)
{
aFragments->Clear();
nsACString::const_iterator begin, end, iter;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
if (!FindCharInReadable('/', iter, end)) {
return NS_OK;
}
const nsACString& host = Substring(begin, iter++);
nsAutoCString path;
path.Assign(Substring(iter, end));
/**
* From the protocol doc:
* For the hostname, the client will try at most 5 different strings. They
* are:
* a) The exact hostname of the url
* b) The 4 hostnames formed by starting with the last 5 components and
* successivly removing the leading component. The top-level component
* can be skipped. This is not done if the hostname is a numerical IP.
*/
nsTArray<nsCString> hosts;
hosts.AppendElement(host);
if (!IsCanonicalizedIP(host)) {
host.BeginReading(begin);
host.EndReading(end);
int numHostComponents = 0;
while (RFindInReadable("."_ns, begin, end) &&
numHostComponents < MAX_HOST_COMPONENTS) {
// don't bother checking toplevel domains
if (++numHostComponents >= 2) {
host.EndReading(iter);
hosts.AppendElement(Substring(end, iter));
}
end = begin;
host.BeginReading(begin);
}
}
/**
* From the protocol doc:
* For the path, the client will also try at most 6 different strings.
* They are:
* a) the exact path of the url, including query parameters
* b) the exact path of the url, without query parameters
* c) the 4 paths formed by starting at the root (/) and
* successively appending path components, including a trailing
* slash. This behavior should only extend up to the next-to-last
* path component, that is, a trailing slash should never be
* appended that was not present in the original url.
*/
nsTArray<nsCString> paths;
nsAutoCString pathToAdd;
path.BeginReading(begin);
path.EndReading(end);
iter = begin;
if (FindCharInReadable('?', iter, end)) {
pathToAdd = Substring(begin, iter);
paths.AppendElement(pathToAdd);
end = iter;
}
int numPathComponents = 1;
iter = begin;
while (FindCharInReadable('/', iter, end) &&
numPathComponents < MAX_PATH_COMPONENTS) {
iter++;
pathToAdd.Assign(Substring(begin, iter));
paths.AppendElement(pathToAdd);
numPathComponents++;
}
// If we haven't already done so, add the full path
if (!pathToAdd.Equals(path)) {
paths.AppendElement(path);
}
// Check an empty path (for whole-domain blocklist entries)
if (!paths.Contains(""_ns)) {
paths.AppendElement(""_ns);
}
for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
nsCString key;
key.Assign(hosts[hostIndex]);
key.Append('/');
key.Append(paths[pathIndex]);
aFragments->AppendElement(key);
}
}
return NS_OK;
}
nsresult LookupCache::LoadPrefixSet() {
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
NS_ENSURE_SUCCESS(rv, rv);
bool exists;
rv = psFile->Exists(&exists);
NS_ENSURE_SUCCESS(rv, rv);
if (exists) {
LOG(("stored PrefixSet exists, loading from disk"));
rv = LoadFromFile(psFile);
if (NS_FAILED(rv)) {
return rv;
}
mPrimed = true;
} else {
// The only scenario we load the old .pset file is when we haven't received
// a SafeBrowsng update before. After receiving an update, new .vlpset will
// be stored while old .pset will be removed.
if (NS_SUCCEEDED(LoadLegacyFile())) {
mPrimed = true;
} else {
LOG(("no (usable) stored PrefixSet found"));
}
}
#ifdef DEBUG
if (mPrimed) {
uint32_t size = SizeOfPrefixSet();
LOG(("SB tree done, size = %d bytes\n", size));
}
#endif
return NS_OK;
}
size_t LookupCache::SizeOfPrefixSet() const {
return mVLPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
}
#if defined(DEBUG)
void LookupCache::DumpCache() const {
if (!LOG_ENABLED()) {
return;
}
for (const auto& cacheEntry : mFullHashCache) {
CachedFullHashResponse* response = cacheEntry.GetWeak();
nsAutoCString prefix;
CStringToHexString(
nsCString(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
PREFIX_SIZE),
prefix);
LOG(("Cache prefix(%s): %s, Expiry: %s", mTableName.get(), prefix.get(),
GetFormattedTimeString(response->negativeCacheExpirySec).get()));
FullHashExpiryCache& fullHashes = response->fullHashes;
for (const auto& fullHashEntry : fullHashes) {
nsAutoCString fullhash;
CStringToHexString(fullHashEntry.GetKey(), fullhash);
LOG((" - %s, Expiry: %s", fullhash.get(),
GetFormattedTimeString(fullHashEntry.GetData()).get()));
}
}
}
#endif
nsresult LookupCache::StoreToFile(nsCOMPtr<nsIFile>& aFile) {
NS_ENSURE_ARG_POINTER(aFile);
uint32_t fileSize = sizeof(Header) +
mVLPrefixSet->CalculatePreallocateSize() +
nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
nsCOMPtr<nsIOutputStream> localOutFile;
nsresult rv =
NS_NewSafeLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Preallocate the file storage
{
nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
Unused << fos->Preallocate(fileSize);
}
nsCOMPtr<nsIOutputStream> out;
rv = NS_NewCrc32OutputStream(getter_AddRefs(out), localOutFile.forget(),
std::min(fileSize, MAX_BUFFER_SIZE));
// Write header
Header header;
GetHeader(header);
rv = WriteValue(out, header);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Write prefixes
rv = mVLPrefixSet->WritePrefixes(out);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Write checksum
nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
rv = safeOut->Finish();
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
LOG(("[%s] Storing PrefixSet successful", mTableName.get()));
// This is to remove old ".pset" files if exist
Unused << ClearLegacyFile();
return NS_OK;
}
nsresult LookupCache::LoadFromFile(nsCOMPtr<nsIFile>& aFile) {
NS_ENSURE_ARG_POINTER(aFile);
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
nsCOMPtr<nsIInputStream> localInFile;
nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
PR_RDONLY | nsIFile::OS_READAHEAD);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Calculate how big the file is, make sure our read buffer isn't bigger
// than the file itself which is just wasting memory.
int64_t fileSize;
rv = aFile->GetFileSize(&fileSize);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
if (fileSize < 0 || fileSize > UINT32_MAX) {
return NS_ERROR_FAILURE;
}
uint32_t bufferSize =
std::min<uint32_t>(static_cast<uint32_t>(fileSize), MAX_BUFFER_SIZE);
// Convert to buffered stream
nsCOMPtr<nsIInputStream> in;
rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
bufferSize);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Load header
Header header;
rv = ReadValue(in, header);
if (NS_WARN_IF(NS_FAILED(rv))) {
LOG(("Failed to read header for %s", mTableName.get()));
return NS_ERROR_FILE_CORRUPTED;
}
rv = SanityCheck(header);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Load data
rv = mVLPrefixSet->LoadPrefixes(in);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
// Load crc32 checksum and verify
rv = VerifyCRC32(in);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
mPrimed = true;
LOG(("[%s] Loading PrefixSet successful", mTableName.get()));
return NS_OK;
}
// This function assumes CRC32 checksum is in the end of the input stream
nsresult LookupCache::VerifyCRC32(nsCOMPtr<nsIInputStream>& aIn) {
nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(aIn);
nsresult rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, 0);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
uint64_t len;
rv = aIn->Available(&len);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
uint32_t calculateCrc32 = ~0;
// We don't want to include the checksum itself
len = len - nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
static const uint64_t STREAM_BUFFER_SIZE = 4096;
char buffer[STREAM_BUFFER_SIZE];
while (len) {
uint32_t read;
uint64_t readLimit = std::min<uint64_t>(STREAM_BUFFER_SIZE, len);
rv = aIn->Read(buffer, readLimit, &read);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
calculateCrc32 = ComputeCrc32c(
calculateCrc32, reinterpret_cast<const uint8_t*>(buffer), read);
len -= read;
}
// Now read the CRC32
uint32_t crc32;
ReadValue(aIn, crc32);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
if (crc32 != calculateCrc32) {
return NS_ERROR_FILE_CORRUPTED;
}
return NS_OK;
}
nsresult LookupCacheV2::Has(const Completion& aCompletion, bool* aHas,
uint32_t* aMatchLength, bool* aConfirmed) {
*aHas = *aConfirmed = false;
*aMatchLength = 0;
uint32_t length = 0;
nsDependentCSubstring fullhash;
fullhash.Rebind((const char*)aCompletion.buf, COMPLETE_SIZE);
uint32_t prefix = aCompletion.ToUint32();
nsresult rv = mVLPrefixSet->Matches(prefix, fullhash, &length);
NS_ENSURE_SUCCESS(rv, rv);
if (length == 0) {
return NS_OK;
}
MOZ_ASSERT(length == PREFIX_SIZE || length == COMPLETE_SIZE);
*aHas = true;
*aMatchLength = length;
*aConfirmed = length == COMPLETE_SIZE;
if (!(*aConfirmed)) {
rv = CheckCache(aCompletion, aHas, aConfirmed);
}
return rv;
}
nsresult LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
AddCompleteArray& aAddCompletes) {
nsresult rv = mVLPrefixSet->SetPrefixes(aAddPrefixes, aAddCompletes);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
mPrimed = true;
return NS_OK;
}
nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes) {
if (!mPrimed) {
// This can happen if its a new table, so no error.
LOG(("GetPrefixes from empty LookupCache"));
return NS_OK;
}
return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, nullptr);
}
nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes,
FallibleTArray<nsCString>& aAddCompletes) {
if (!mPrimed) {
// This can happen if its a new table, so no error.
LOG(("GetHashes from empty LookupCache"));
return NS_OK;
}
return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, &aAddCompletes);
}
nsresult LookupCacheV2::GetPrefixByIndex(uint32_t aIndex,
uint32_t* aOutPrefix) const {
NS_ENSURE_ARG_POINTER(aOutPrefix);
return mVLPrefixSet->GetFixedLengthPrefixByIndex(aIndex, aOutPrefix);
}
void LookupCacheV2::AddGethashResultToCache(
const AddCompleteArray& aAddCompletes, const MissPrefixArray& aMissPrefixes,
int64_t aExpirySec) {
static const int64_t CACHE_DURATION_SEC = 15 * 60;
int64_t defaultExpirySec = PR_Now() / PR_USEC_PER_SEC + CACHE_DURATION_SEC;
if (aExpirySec != 0) {
defaultExpirySec = aExpirySec;
}
for (const AddComplete& add : aAddCompletes) {
nsDependentCSubstring fullhash(
reinterpret_cast<const char*>(add.CompleteHash().buf), COMPLETE_SIZE);
CachedFullHashResponse* response =
mFullHashCache.GetOrInsertNew(add.ToUint32());
response->negativeCacheExpirySec = defaultExpirySec;
FullHashExpiryCache& fullHashes = response->fullHashes;
fullHashes.InsertOrUpdate(fullhash, defaultExpirySec);
}
for (const Prefix& prefix : aMissPrefixes) {
CachedFullHashResponse* response =
mFullHashCache.GetOrInsertNew(prefix.ToUint32());
response->negativeCacheExpirySec = defaultExpirySec;
}
}
void LookupCacheV2::GetHeader(Header& aHeader) {
aHeader.magic = LookupCacheV2::VLPSET_MAGIC;
aHeader.version = LookupCacheV2::VLPSET_VERSION;
}
nsresult LookupCacheV2::SanityCheck(const Header& aHeader) {
if (aHeader.magic != LookupCacheV2::VLPSET_MAGIC) {
return NS_ERROR_FILE_CORRUPTED;
}
if (aHeader.version != LookupCacheV2::VLPSET_VERSION) {
return NS_ERROR_FAILURE;
}
return NS_OK;
}
nsresult LookupCacheV2::LoadLegacyFile() {
// Because mozilla Safe Browsing v2 server only includes completions
// in the update, we can simplify this function by only loading .sbtore
if (!mProvider.EqualsLiteral("mozilla")) {
return NS_OK;
}
HashStore store(mTableName, mProvider, mRootStoreDirectory);
// Support loading version 3 HashStore.
nsresult rv = store.Open(3);
NS_ENSURE_SUCCESS(rv, rv);
if (store.AddChunks().Length() == 0 && store.SubChunks().Length() == 0) {
// Return when file doesn't exist
return NS_OK;
}
AddPrefixArray prefix;
AddCompleteArray addComplete;
rv = store.ReadCompletionsLegacyV3(addComplete);
NS_ENSURE_SUCCESS(rv, rv);
return Build(prefix, addComplete);
}
nsresult LookupCacheV2::ClearLegacyFile() {
nsCOMPtr<nsIFile> file;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
rv = file->AppendNative(mTableName + ".pset"_ns);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
bool exists;
rv = file->Exists(&exists);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
if (exists) {
rv = file->Remove(false);
if (NS_WARN_IF(NS_FAILED(rv))) {
return rv;
}
LOG(("[%s]Old PrefixSet is successfully removed!", mTableName.get()));
}
return NS_OK;
}
nsCString LookupCacheV2::GetPrefixSetSuffix() const { return ".vlpset"_ns; }
// Support creating built-in entries for phsihing, malware, unwanted, harmful,
// tracking/tracking exceptionlist and flash block tables.
//
nsresult LookupCacheV2::LoadMozEntries() {
// We already have the entries, return
if (!IsEmpty() || IsPrimed()) {
return NS_OK;
}
nsTArray<nsLiteralCString> entries;
if (mTableName.EqualsLiteral("moztest-phish-simple")) {
// Entries for phishing table
entries.AppendElement("itisatrap.org/firefox/its-a-trap.html"_ns);
} else if (mTableName.EqualsLiteral("moztest-malware-simple")) {
// Entries for malware table
entries.AppendElement("itisatrap.org/firefox/its-an-attack.html"_ns);
} else if (mTableName.EqualsLiteral("moztest-unwanted-simple")) {
// Entries for unwanted table
entries.AppendElement("itisatrap.org/firefox/unwanted.html"_ns);
} else if (mTableName.EqualsLiteral("moztest-harmful-simple")) {
// Entries for harmfule tables
entries.AppendElement("itisatrap.org/firefox/harmful.html"_ns);
} else if (mTableName.EqualsLiteral("moztest-track-simple")) {
// Entries for tracking table
entries.AppendElement("trackertest.org/"_ns);
entries.AppendElement("itisatracker.org/"_ns);
} else if (mTableName.EqualsLiteral("moztest-trackwhite-simple")) {
// Entries for tracking entitylist table
entries.AppendElement("itisatrap.org/?resource=itisatracker.org"_ns);
} else if (mTableName.EqualsLiteral("moztest-block-simple")) {
// Entries for flash block table
entries.AppendElement("itisatrap.org/firefox/blocked.html"_ns);
} else {
MOZ_ASSERT_UNREACHABLE();
}
AddPrefixArray prefix;
AddCompleteArray completes;
for (const auto& entry : entries) {
AddComplete add;
if (NS_FAILED(add.complete.FromPlaintext(entry))) {
continue;
}
if (!completes.AppendElement(add, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
}
return Build(prefix, completes);
}
} // namespace safebrowsing
} // namespace mozilla