Source code

Revision control

Copy as Markdown

Other Tools

# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Classes for each of the high-level metric types.
"""
import enum
from typing import Any, Dict, List, Optional, Type, Union # noqa
from . import pings
from . import tags
from . import util
# Important: if the values are ever changing here, make sure
# to also fix mozilla/glean. Otherwise language bindings may
# break there.
class Lifetime(enum.Enum):
ping = 0
application = 1
user = 2
class DataSensitivity(enum.Enum):
technical = 1
interaction = 2
stored_content = 3
web_activity = 3 # Old, deprecated name
highly_sensitive = 4
class Metric:
typename: str = "ERROR"
glean_internal_metric_cat: str = "glean.internal.metrics"
metric_types: Dict[str, Any] = {}
default_store_names: List[str] = ["metrics"]
def __init__(
self,
type: str,
category: str,
name: str,
bugs: List[str],
description: str,
notification_emails: List[str],
expires: Any,
metadata: Optional[Dict] = None,
data_reviews: Optional[List[str]] = None,
version: int = 0,
disabled: bool = False,
lifetime: str = "ping",
send_in_pings: Optional[List[str]] = None,
unit: Optional[str] = None,
gecko_datapoint: str = "",
no_lint: Optional[List[str]] = None,
data_sensitivity: Optional[List[str]] = None,
defined_in: Optional[Dict] = None,
telemetry_mirror: Optional[str] = None,
_config: Optional[Dict[str, Any]] = None,
_validated: bool = False,
):
# Avoid cyclical import
from . import parser
self.type = type
self.category = category
self.name = name
self.bugs = bugs
self.description = description
self.notification_emails = notification_emails
self.expires = expires
if metadata is None:
metadata = {}
self.metadata = metadata
if data_reviews is None:
data_reviews = []
self.data_reviews = data_reviews
self.version = version
self.disabled = disabled
self.lifetime = getattr(Lifetime, lifetime)
if send_in_pings is None:
send_in_pings = ["default"]
self.send_in_pings = send_in_pings
self.unit = unit
self.gecko_datapoint = gecko_datapoint
if no_lint is None:
no_lint = []
self.no_lint = no_lint
if data_sensitivity is not None:
self.data_sensitivity = [
getattr(DataSensitivity, x) for x in data_sensitivity
]
self.defined_in = defined_in
if telemetry_mirror is not None:
self.telemetry_mirror = telemetry_mirror
# _validated indicates whether this metric has already been jsonschema
# validated (but not any of the Python-level validation).
if not _validated:
data = {
"$schema": parser.METRICS_ID,
self.category: {self.name: self._serialize_input()},
} # type: Dict[str, util.JSONType]
for error in parser.validate(data):
raise ValueError(error)
# Store the config, but only after validation.
if _config is None:
_config = {}
self._config = _config
# Metrics in the special category "glean.internal.metrics" need to have
# an empty category string when identifying the metrics in the ping.
if self.category == Metric.glean_internal_metric_cat:
self.category = ""
def __init_subclass__(cls, **kwargs):
# Create a mapping of all of the subclasses of this class
if cls not in Metric.metric_types and hasattr(cls, "typename"):
Metric.metric_types[cls.typename] = cls
super().__init_subclass__(**kwargs)
@classmethod
def make_metric(
cls,
category: str,
name: str,
metric_info: Dict[str, util.JSONType],
config: Optional[Dict[str, Any]] = None,
validated: bool = False,
):
"""
Given a metric_info dictionary from metrics.yaml, return a metric
instance.
:param: category The category the metric lives in
:param: name The name of the metric
:param: metric_info A dictionary of the remaining metric parameters
:param: config A dictionary containing commandline configuration
parameters
:param: validated True if the metric has already gone through
jsonschema validation
:return: A new Metric instance.
"""
if config is None:
config = {}
metric_type = metric_info["type"]
if not isinstance(metric_type, str):
raise TypeError(f"Unknown metric type {metric_type}")
return cls.metric_types[metric_type](
category=category,
name=name,
defined_in=getattr(metric_info, "defined_in", None),
_validated=validated,
_config=config,
**metric_info,
)
def serialize(self) -> Dict[str, util.JSONType]:
"""
Serialize the metric back to JSON object model.
"""
d = self.__dict__.copy()
# Convert enum fields back to strings
for key, val in d.items():
if isinstance(val, enum.Enum):
d[key] = d[key].name
if isinstance(val, set):
d[key] = sorted(list(val))
if isinstance(val, list) and len(val) and isinstance(val[0], enum.Enum):
d[key] = [x.name for x in val]
del d["name"]
del d["category"]
if not d["unit"]:
d.pop("unit")
d.pop("_config", None)
d.pop("_generate_enums", None)
d.pop("_generate_structure", None)
return d
def _serialize_input(self) -> Dict[str, util.JSONType]:
d = self.serialize()
modified_dict = util.remove_output_params(d, "defined_in")
return modified_dict
def identifier(self) -> str:
"""
Create an identifier unique for this metric.
Generally, category.name; however, Glean internal
metrics only use name.
"""
if not self.category:
return self.name
return ".".join((self.category, self.name))
def is_disabled(self) -> bool:
return self.disabled or self.is_expired()
def is_expired(self) -> bool:
def default_handler(expires) -> bool:
return util.is_expired(expires, self._config.get("expire_by_version"))
return self._config.get("custom_is_expired", default_handler)(self.expires)
def validate_expires(self):
def default_handler(expires):
return util.validate_expires(expires, self._config.get("expire_by_version"))
return self._config.get("custom_validate_expires", default_handler)(
self.expires
)
def is_internal_metric(self) -> bool:
return self.category in (Metric.glean_internal_metric_cat, "")
class Boolean(Metric):
typename = "boolean"
class String(Metric):
typename = "string"
class StringList(Metric):
typename = "string_list"
class Counter(Metric):
typename = "counter"
class Quantity(Metric):
typename = "quantity"
class TimeUnit(enum.Enum):
nanosecond = 0
microsecond = 1
millisecond = 2
second = 3
minute = 4
hour = 5
day = 6
class TimeBase(Metric):
def __init__(self, *args, **kwargs):
self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "millisecond"))
super().__init__(*args, **kwargs)
class Timespan(TimeBase):
typename = "timespan"
class TimingDistribution(TimeBase):
typename = "timing_distribution"
def __init__(self, *args, **kwargs):
self.time_unit = getattr(TimeUnit, kwargs.pop("time_unit", "nanosecond"))
Metric.__init__(self, *args, **kwargs)
class MemoryUnit(enum.Enum):
byte = 0
kilobyte = 1
megabyte = 2
gigabyte = 3
class MemoryDistribution(Metric):
typename = "memory_distribution"
def __init__(self, *args, **kwargs):
self.memory_unit = getattr(MemoryUnit, kwargs.pop("memory_unit", "byte"))
super().__init__(*args, **kwargs)
class HistogramType(enum.Enum):
linear = 0
exponential = 1
class CustomDistribution(Metric):
typename = "custom_distribution"
def __init__(self, *args, **kwargs):
self.range_min = kwargs.pop("range_min", 1)
self.range_max = kwargs.pop("range_max")
self.bucket_count = kwargs.pop("bucket_count")
self.histogram_type = getattr(
HistogramType, kwargs.pop("histogram_type", "exponential")
)
super().__init__(*args, **kwargs)
class Datetime(TimeBase):
typename = "datetime"
class Event(Metric):
typename = "event"
default_store_names = ["events"]
def __init__(self, *args, **kwargs):
self.extra_keys = kwargs.pop("extra_keys", {})
self.validate_extra_keys(self.extra_keys, kwargs.get("_config", {}))
super().__init__(*args, **kwargs)
self._generate_enums = [("allowed_extra_keys_with_types", "Extra")]
@property
def allowed_extra_keys(self):
# Sort keys so that output is deterministic
return sorted(list(self.extra_keys.keys()))
@property
def allowed_extra_keys_with_types(self):
# Sort keys so that output is deterministic
return sorted(
[(k, v.get("type", "string")) for (k, v) in self.extra_keys.items()],
key=lambda x: x[0],
)
@staticmethod
def validate_extra_keys(extra_keys: Dict[str, str], config: Dict[str, Any]) -> None:
if not config.get("allow_reserved") and any(
k.startswith("glean.") for k in extra_keys.keys()
):
raise ValueError(
"Extra keys beginning with 'glean.' are reserved for "
"Glean internal use."
)
class Uuid(Metric):
typename = "uuid"
class Url(Metric):
typename = "url"
class Jwe(Metric):
typename = "jwe"
def __init__(self, *args, **kwargs):
raise ValueError(
"JWE support was removed. "
"If you require this send an email to glean-team@mozilla.com."
)
class CowString(str):
"""
Wrapper class for strings that should be represented
as a `Cow<'static, str>` in Rust,
or `String` in other target languages.
This wraps `str`, so unless `CowString` is specifically
handled it acts (and serializes)
as a string.
"""
def __init__(self, val: str):
self.inner: str = val
def __eq__(self, other):
return self.inner == other.inner
def __hash__(self):
return self.inner.__hash__()
def __lt__(self, other):
return self.inner.__lt__(other.inner)
class Labeled(Metric):
labeled = True
def __init__(self, *args, **kwargs):
labels = kwargs.pop("labels", None)
if labels is not None:
self.ordered_labels = labels
self.labels = set([CowString(label) for label in labels])
else:
self.ordered_labels = None
self.labels = None
super().__init__(*args, **kwargs)
def serialize(self) -> Dict[str, util.JSONType]:
"""
Serialize the metric back to JSON object model.
"""
d = super().serialize()
d["labels"] = self.ordered_labels
del d["ordered_labels"]
return d
class LabeledBoolean(Labeled, Boolean):
typename = "labeled_boolean"
class LabeledString(Labeled, String):
typename = "labeled_string"
class LabeledCounter(Labeled, Counter):
typename = "labeled_counter"
class Rate(Metric):
typename = "rate"
def __init__(self, *args, **kwargs):
self.denominator_metric = kwargs.pop("denominator_metric", None)
super().__init__(*args, **kwargs)
class Denominator(Counter):
typename = "denominator"
# A denominator is a counter with an additional list of numerators.
numerators: List[Rate] = []
class Text(Metric):
typename = "text"
class Object(Metric):
typename = "object"
def __init__(self, *args, **kwargs):
structure = kwargs.pop("structure", None)
if not structure:
raise ValueError("`object` is missing required parameter `structure`")
self._generate_structure = self.validate_structure(structure)
super().__init__(*args, **kwargs)
ALLOWED_TOPLEVEL = {"type", "properties", "items"}
ALLOWED_TYPES = ["object", "array", "number", "string", "boolean"]
@staticmethod
def _validate_substructure(structure):
extra = set(structure.keys()) - Object.ALLOWED_TOPLEVEL
if extra:
extra = ", ".join(extra)
allowed = ", ".join(Object.ALLOWED_TOPLEVEL)
raise ValueError(
f"Found additional fields: {extra}. Only allowed: {allowed}"
)
if "type" not in structure or structure["type"] not in Object.ALLOWED_TYPES:
raise ValueError("invalid or missing `type` in object structure")
if structure["type"] == "object":
if "items" in structure:
raise ValueError("`items` not allowed in object structure")
if "properties" not in structure:
raise ValueError("`properties` missing for type `object`")
for key in structure["properties"]:
value = structure["properties"][key]
structure["properties"][key] = Object._validate_substructure(value)
if structure["type"] == "array":
if "properties" in structure:
raise ValueError("`properties` not allowed in array structure")
if "items" not in structure:
raise ValueError("`items` missing for type `array`")
value = structure["items"]
structure["items"] = Object._validate_substructure(value)
return structure
@staticmethod
def validate_structure(structure):
if None:
raise ValueError("`structure` needed for object metric.")
structure = Object._validate_substructure(structure)
return structure
ObjectTree = Dict[str, Dict[str, Union[Metric, pings.Ping, tags.Tag]]]