Source code

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import abc
import re
import sys
from pathlib import Path
from threading import Thread
import sentry_sdk
from mozversioncontrol import (
InvalidRepoPath,
MissingUpstreamRepo,
MissingVCSTool,
get_repository_object,
)
from six import string_types
from mach.telemetry import is_telemetry_enabled
from mach.util import get_state_dir
_SENTRY_DSN = (
)
class ErrorReporter(object):
@abc.abstractmethod
def report_exception(self, exception):
"""Report the exception to remote error-tracking software."""
class SentryErrorReporter(ErrorReporter):
"""Reports errors using Sentry."""
def report_exception(self, exception):
return sentry_sdk.capture_exception(exception)
class NoopErrorReporter(ErrorReporter):
"""Drops errors instead of reporting them.
This is useful in cases where error-reporting is specifically disabled, such as
when telemetry hasn't been allowed.
"""
def report_exception(self, exception):
return None
def register_sentry(argv, settings, topsrcdir: Path):
if not is_telemetry_enabled(settings):
return NoopErrorReporter()
global _is_unmodified_mach_core_thread
_is_unmodified_mach_core_thread = Thread(
target=_is_unmodified_mach_core,
args=[topsrcdir],
daemon=True,
)
_is_unmodified_mach_core_thread.start()
sentry_sdk.init(
_SENTRY_DSN, before_send=lambda event, _: _process_event(event, topsrcdir)
)
sentry_sdk.add_breadcrumb(message="./mach {}".format(" ".join(argv)))
return SentryErrorReporter()
def _process_event(sentry_event, topsrcdir: Path):
# Returning nothing causes the event to be dropped:
repo = _get_repository_object(topsrcdir)
if repo is None:
# We don't know the repo state, so we don't know if mach files are
# unmodified.
return
base_ref = repo.base_ref_as_hg()
if not base_ref:
# If we don't know which revision this exception is attached to, then it's
# not worth sending
return
_is_unmodified_mach_core_thread.join()
if not _is_unmodified_mach_core_result:
return
for map_fn in (_settle_mach_module_id, _patch_absolute_paths, _delete_server_name):
sentry_event = map_fn(sentry_event, topsrcdir)
sentry_event["release"] = "hg-rev-{}".format(base_ref)
return sentry_event
def _settle_mach_module_id(sentry_event, _):
# Sentry groups issues according to the stack frames and their associated
# "module" properties. However, one of the modules is being reported
# like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id).
# This function replaces that generated id with the static string "<generated>"
# so that grouping behaves as expected
stacktrace_frames = sentry_event["exception"]["values"][0]["stacktrace"]["frames"]
for frame in stacktrace_frames:
module = frame.get("module")
if not module:
continue
module = re.sub(
"mach\\.commands\\.[a-f0-9]{32}", "mach.commands.<generated>", module
)
frame["module"] = module
return sentry_event
def _patch_absolute_paths(sentry_event, topsrcdir: Path):
# we remove usernames from file names with a best-effort basis. The most likely
# place for usernames to manifest in Sentry information is within absolute paths,
# such as: "/home/mitch/dev/firefox/mach"
# We replace the state_dir, obj_dir, src_dir with "<...>" placeholders.
# Note that we also do a blanket find-and-replace of the user's name with "<user>",
# which may have ill effects if the user's name is, by happenstance, a substring
# of some other value within the Sentry event.
def recursive_patch(value, needle, replacement):
if isinstance(value, list):
return [recursive_patch(v, needle, replacement) for v in value]
elif isinstance(value, dict):
for key in list(value.keys()):
next_value = value.pop(key)
key = needle.sub(replacement, key)
value[key] = recursive_patch(next_value, needle, replacement)
return value
elif isinstance(value, string_types):
return needle.sub(replacement, value)
else:
return value
for target_path, replacement in (
(get_state_dir(), "<statedir>"),
(str(topsrcdir), "<topsrcdir>"),
(str(Path.home()), "~"),
):
# Sentry converts "vars" to their "representations". When paths are in local
# variables on Windows, "C:\Users\MozillaUser\Desktop" becomes
# "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr"
# the home directory and scrub the beginning and end quotes, then
# find-and-replace on that.
repr_path = repr(target_path)[1:-1]
for target in (target_path, repr_path):
# Paths in the Sentry event aren't consistent:
# * On *nix, they're mostly forward slashes.
# * On *nix, not all absolute paths start with a leading forward slash.
# * On Windows, they're mostly backslashes.
# * On Windows, `.extra."sys.argv"` uses forward slashes.
# * The Python variables in-scope captured by the Sentry report may be
# inconsistent, even for a single path. For example, on
# Windows, Mach calculates the state_dir as "C:\Users\<user>/.mozbuild".
# Handle the case where not all absolute paths start with a leading
# forward slash: make the initial slash optional in the search string.
if target.startswith("/"):
target = "/?" + target[1:]
# Handle all possible slash variants: our search string should match
# both forward slashes and backslashes. This is done by dynamically
# replacing each "/" and "\" with the regex "[\/\\]" (match both).
slash_regex = re.compile(r"[\/\\]")
# The regex module parses string backslash escapes before compiling the
# regex, so we need to add more backslashes:
# "[\\/\\\\]" => [\/\\] => match "/" and "\"
target = slash_regex.sub(r"[\\/\\\\]", target)
# Compile the regex and patch the event.
needle_regex = re.compile(target, re.IGNORECASE)
sentry_event = recursive_patch(sentry_event, needle_regex, replacement)
return sentry_event
def _delete_server_name(sentry_event, _):
sentry_event.pop("server_name")
return sentry_event
def _get_repository_object(topsrcdir: Path):
try:
return get_repository_object(str(topsrcdir))
except (InvalidRepoPath, MissingVCSTool) as e:
print(f"Warning: {e}", file=sys.stderr)
return None
def _is_unmodified_mach_core(topsrcdir: Path):
"""True if mach is unmodified compared to the public tree.
To avoid submitting Sentry events for errors caused by user's
local changes, we attempt to detect if mach (or code affecting mach)
has been modified in the user's local state:
* In a revision off of a "ancestor to central" revision, or:
* In the working, uncommitted state.
If "$topsrcdir/mach" and "*.py" haven't been touched, then we can be
pretty confident that the Mach behaviour that caused the exception
also exists in the public tree.
"""
global _is_unmodified_mach_core_result
repo = _get_repository_object(topsrcdir)
try:
files = set(repo.get_outgoing_files()) | set(repo.get_changed_files())
_is_unmodified_mach_core_result = not any(
[file for file in files if file == "mach" or file.endswith(".py")]
)
except MissingUpstreamRepo:
# If we don't know the upstream state, we don't know if the mach files
# have been unmodified.
_is_unmodified_mach_core_result = False
_is_unmodified_mach_core_result = None
_is_unmodified_mach_core_thread = None