Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
});
ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
return console.createInstance({
prefix: "InteractionsBlocklist",
maxLogLevel: Services.prefs.getBoolPref(
"browser.places.interactions.log",
false
)
? "Debug"
: "Warn",
});
});
// A blocklist of regular expressions. Maps base hostnames to a list regular
// expressions for URLs with that base hostname. In this context, "base
// hostname" means the hostname without any subdomains or a public suffix. For
// example, the base hostname for "https://www.maps.google.com/a/place" is
// "google". We do this mapping to improve performance; otherwise we'd have to
// check all URLs against a long list of regular expressions. The regexes are
// defined as escaped strings so that we build them lazily.
// We may want to migrate this list to Remote Settings in the future.
let HOST_BLOCKLIST = {
auth0: [
// Auth0 OAuth.
// XXX: Used alone this could produce false positives where an auth0 URL
// appears after another valid domain and TLD, but since we limit this to
// the auth0 hostname those occurrences will be filtered out.
"^https:\\/\\/.*\\.auth0\\.com\\/login",
],
baidu: [
// Baidu SERP
"^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
],
bing: [
// Bing SERP
"^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
],
duckduckgo: [
// DuckDuckGo SERP
"^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
],
google: [
// Google SERP
"^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
// Google OAuth
"^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth",
"^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent",
],
microsoftonline: [
// Microsoft OAuth
"^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize",
],
yandex: [
// Yandex SERP
"^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
],
zoom: [
// Zoom meeting interstitial
"^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
],
};
HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
get(target, property) {
let regexes = target[property];
if (!regexes || !Array.isArray(regexes)) {
return null;
}
for (let i = 0; i < regexes.length; i++) {
let regex = regexes[i];
if (typeof regex === "string") {
regex = new RegExp(regex, "i");
if (regex) {
regexes[i] = regex;
} else {
throw new Error("Blocklist contains invalid regex.");
}
}
}
return regexes;
},
});
/**
* A class that maintains a blocklist of URLs. The class exposes a method to
* check if a particular URL is contained on the blocklist.
*/
class _InteractionsBlocklist {
constructor() {
// Load custom blocklist items from pref.
try {
let customBlocklist = JSON.parse(
Services.prefs.getStringPref(
"places.interactions.customBlocklist",
"[]"
)
);
if (!Array.isArray(customBlocklist)) {
throw new Error();
}
let parsedBlocklist = customBlocklist.map(
regexStr => new RegExp(regexStr)
);
HOST_BLOCKLIST["*"] = parsedBlocklist;
} catch (ex) {
lazy.logConsole.warn("places.interactions.customBlocklist is corrupted.");
}
}
/**
* Only certain urls can be added as Interactions, either manually or
* automatically.
*
* @returns {Map} A Map keyed by protocol, for each protocol an object may
* define stricter requirements, like extension.
*/
get urlRequirements() {
return new Map([
["http:", {}],
["https:", {}],
["file:", { extension: "pdf" }],
]);
}
/**
* Whether to record interactions for a given URL.
* The rules are defined in InteractionsBlocklist.urlRequirements.
*
* @param {string|URL|nsIURI} url The URL to check.
* @returns {boolean} whether the url can be recorded.
*/
canRecordUrl(url) {
let protocol, pathname;
if (typeof url == "string") {
url = new URL(url);
}
if (url instanceof Ci.nsIURI) {
protocol = url.scheme + ":";
pathname = url.filePath;
} else {
protocol = url.protocol;
pathname = url.pathname;
}
let requirements = InteractionsBlocklist.urlRequirements.get(protocol);
return (
requirements &&
(!requirements.extension || pathname.endsWith(requirements.extension))
);
}
/**
* Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
* should not record an interaction.
*
* @param {string} urlToCheck
* The URL we are looking for on the blocklist.
* @returns {boolean}
* True if `url` is on a blocklist. False otherwise.
*/
isUrlBlocklisted(urlToCheck) {
if (lazy.FilterAdult.isAdultUrl(urlToCheck)) {
return true;
}
if (!this.canRecordUrl(urlToCheck)) {
return true;
}
// First, find the URL's base host: the hostname without any subdomains or a
// public suffix.
let url;
try {
url = new URL(urlToCheck);
if (!url) {
throw new Error();
}
} catch (ex) {
lazy.logConsole.warn(
`Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}`
);
return false;
}
if (url.protocol == "file:") {
return false;
}
let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost(
url.host
);
let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim(
hostWithoutSuffix,
{
stripWww: true,
trimTrailingDot: true,
}
);
let baseHost = hostWithSubdomains.substring(
hostWithSubdomains.lastIndexOf(".") + 1
);
// Then fetch blocked regexes for that baseHost and compare them to the full
// URL. Also check the URL against the custom blocklist.
let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || [];
regexes.push(...(HOST_BLOCKLIST["*"] || []));
if (!regexes) {
return false;
}
return regexes.some(r => r.test(url.href));
}
/**
* Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from
* the regex, we add it to a list of wildcard regexes. All URLs are checked
* against these wildcard regexes. Currently only exposed for tests and use in
* the console. In the future we could hook this up to a UI component.
*
* @param {string|RegExp} regexToAdd
* The regular expression to add to our blocklist.
*/
addRegexToBlocklist(regexToAdd) {
let regex;
try {
regex = new RegExp(regexToAdd, "i");
} catch (ex) {
this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
return;
}
if (!HOST_BLOCKLIST["*"]) {
HOST_BLOCKLIST["*"] = [];
}
HOST_BLOCKLIST["*"].push(regex);
Services.prefs.setStringPref(
"places.interactions.customBlocklist",
JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
);
}
/**
* Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the
* blocklist, this is a no-op. Currently only exposed for tests and use in the
* console. In the future we could hook this up to a UI component.
*
* @param {string|RegExp} regexToRemove
* The regular expression to add to our blocklist.
*/
removeRegexFromBlocklist(regexToRemove) {
let regex;
try {
regex = new RegExp(regexToRemove, "i");
} catch (ex) {
this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
return;
}
if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) {
return;
}
HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter(
curr => curr.source != regex.source
);
Services.prefs.setStringPref(
"places.interactions.customBlocklist",
JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
);
}
}
export const InteractionsBlocklist = new _InteractionsBlocklist();