From 7ed68a8bd8d732a565c8e0d348c4b448ec43b598 Mon Sep 17 00:00:00 2001 From: Anna Scholtz Date: Wed, 25 Oct 2023 16:16:56 +0200 Subject: [PATCH] WIP event monitoring --- bqetl_project.yaml | 5 + sql_generators/event_monitoring/__init__.py | 137 ++++++++++++++++++ .../templates/event_monitoring.view.sql | 16 ++ .../templates/event_monitoring_live.init.sql | 68 +++++++++ .../event_monitoring/templates/metadata.yaml | 8 + 5 files changed, 234 insertions(+) create mode 100644 sql_generators/event_monitoring/__init__.py create mode 100644 sql_generators/event_monitoring/templates/event_monitoring.view.sql create mode 100644 sql_generators/event_monitoring/templates/event_monitoring_live.init.sql create mode 100644 sql_generators/event_monitoring/templates/metadata.yaml diff --git a/bqetl_project.yaml b/bqetl_project.yaml index 0f2cf4fd95f..0298f495a47 100644 --- a/bqetl_project.yaml +++ b/bqetl_project.yaml @@ -426,3 +426,8 @@ generate: - sql/moz-fx-data-shared-prod/mozilla_vpn/events/** - sql/moz-fx-data-shared-prod/mozilla_vpn/main/** - sql/moz-fx-data-shared-prod/fenix/client_deduplication/** + event_monitoring: + skip_apps: + - mlhackweek_search + - regrets_reporter + - regrets_reporter_ucs diff --git a/sql_generators/event_monitoring/__init__.py b/sql_generators/event_monitoring/__init__.py new file mode 100644 index 00000000000..ec29ebbf0bb --- /dev/null +++ b/sql_generators/event_monitoring/__init__.py @@ -0,0 +1,137 @@ +"""Event monitoring view generation.""" +import os +from pathlib import Path + +import requests +import click +import yaml +from jinja2 import Environment, FileSystemLoader + +from bigquery_etl.cli.utils import use_cloud_function_option +from bigquery_etl.format_sql.formatter import reformat +from bigquery_etl.util.common import write_sql +from bigquery_etl.config import ConfigLoader + +FILE_PATH = Path(os.path.dirname(__file__)) +BASE_DIR = Path(FILE_PATH).parent.parent +APP_LISTINGS_URL = "https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings" + +def get_app_info(): + """Return a list of applications from the probeinfo API.""" + resp = requests.get(APP_LISTINGS_URL) + resp.raise_for_status() + apps_json = resp.json() + app_info = {} + + for app in apps_json: + if app["app_name"] not in app_info: + app_info[app["app_name"]] = [app] + else: + app_info[app["app_name"]].append(app) + + return app_info + + +def generate_queries(project, path, write_dir): + """Generate event monitoring views.""" + app_info = get_app_info() + + app_info = [info for name, info in app_info.items() if name not in ConfigLoader.get( + "generate", "event_monitoring", "skip_apps", fallback=[] + )] + + template_query_dir = FILE_PATH / "templates" + env = Environment( + loader=FileSystemLoader(template_query_dir), + keep_trailing_newline=True, + ) + sql_template = env.get_template("event_monitoring_live.init.sql") + metadata_template = env.get_template("metadata.yaml") + + for info in app_info: + + + write_sql( + write_dir / project, + f"{project}.{info['']}.{query}", + sql_template_file, + reformat(sql_template.render(**args)), + ) + + write_path = Path(write_dir) / project / "telemetry_derived" / query + (write_path / "metadata.yaml").write_text(metadata_template.render(**args)) + + + for query, args in template_config["queries"].items(): + template_query_dir = FILE_PATH / "templates" / query + env = Environment( + loader=FileSystemLoader(template_query_dir), + keep_trailing_newline=True, + ) + sql_templates = list(template_query_dir.glob("*.sql")) + sql_template_file = sql_templates[0].name + sql_template = env.get_template(sql_template_file) + metadata_template = env.get_template("metadata.yaml") + + args["destination_table"] = query + args["search_metrics"] = template_config["search_metrics"] + + if args["per_app"]: + # generate a separate query for each application dataset + for dataset in template_config["applications"]: + args["dataset"] = dataset + + write_sql( + write_dir / project, + f"{project}.{dataset}_derived.{query}", + sql_template_file, + reformat(sql_template.render(**args)), + ) + + write_path = Path(write_dir) / project / (dataset + "_derived") / query + (write_path / "metadata.yaml").write_text( + metadata_template.render(**args) + ) + else: + # generate a single query that UNIONs application datasets + # these queries are written to `telemetry` + args["applications"] = template_config["applications"] + + write_sql( + write_dir / project, + f"{project}.telemetry_derived.{query}", + sql_template_file, + reformat(sql_template.render(**args)), + ) + + write_path = Path(write_dir) / project / "telemetry_derived" / query + (write_path / "metadata.yaml").write_text(metadata_template.render(**args)) + + +@click.command("generate") +@click.option( + "--target-project", + "--target_project", + help="Which project the queries should be written to.", + default="moz-fx-data-shared-prod", +) +@click.option( + "--path", + help="Where query directories will be searched for.", + default="sql_generators/event_monitoring/templates", + required=False, + type=click.Path(file_okay=False), +) +@click.option( + "--output-dir", + "--output_dir", + help="The location to write to. Defaults to sql/.", + default=Path("sql"), + type=click.Path(file_okay=False), +) +@use_cloud_function_option +def generate(target_project, path, output_dir, use_cloud_function): + """Generate the event monitoring views.""" + output_dir = Path(output_dir) + generate_queries(target_project, path, output_dir) + diff --git a/sql_generators/event_monitoring/templates/event_monitoring.view.sql b/sql_generators/event_monitoring/templates/event_monitoring.view.sql new file mode 100644 index 00000000000..31f7cd8faf5 --- /dev/null +++ b/sql_generators/event_monitoring/templates/event_monitoring.view.sql @@ -0,0 +1,16 @@ +{% for app in apps %} +SELECT + "{{ app['app_id'] }}" AS app_id, + "{{ app['app_name'] }}" AS app_name, + window_start, + window_end, + event_category, + event_name, + event_extra_key, + normalized_channel, + version, + total_events +FROM + `{{ project_id }}.{{ app['app_id'] }}_derived.event_monitoring_live` +{% if not loop.last %} UNION ALL{% endif %} +{% endfor %} \ No newline at end of file diff --git a/sql_generators/event_monitoring/templates/event_monitoring_live.init.sql b/sql_generators/event_monitoring/templates/event_monitoring_live.init.sql new file mode 100644 index 00000000000..992b903d63a --- /dev/null +++ b/sql_generators/event_monitoring/templates/event_monitoring_live.init.sql @@ -0,0 +1,68 @@ + CREATE MATERIALIZED VIEW + IF + NOT EXISTS {{ project_id }}.{{ dataset_id }}_derived.event_monitoring_live + OPTIONS + (enable_refresh = TRUE, refresh_interval_minutes = 60) + AS +{% if dataset_id != "telemetry" %} +SELECT + TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR), + -- Aggregates event counts over 30-minute intervals + INTERVAL(DIV(EXTRACT(MINUTE + FROM + TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) * 60) MINUTE ) AS window_start, + TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE + FROM + TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) + 1) * 60) MINUTE ) AS window_end, + event.category AS event_category, + event.name AS event_name, + event_extra.key AS event_extra_key, + normalized_channel, + client_info.app_display_version AS version, + COUNT(*) AS total_events +FROM + `{{ project_id }}.{{ dataset_id }}_live.events_v1` +CROSS JOIN + UNNEST(events) AS event, + UNNEST(event.extra) AS event_extra +{% else %} + TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR), + -- Aggregates event counts over 30-minute intervals + INTERVAL(DIV(EXTRACT(MINUTE + FROM + TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) * 30) MINUTE ) AS window_start, + TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE + FROM + TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) + 1) * 30) MINUTE ) AS window_end, + + event.f2_ AS event_name, + event.f1_ AS event_category, + event_map_value.key = 'branch' AS event_extra_key, + normalized_channel, + application.version AS version, + COUNT(*) AS total_events + FROM + `moz-fx-data-shared-prod.telemetry_live.event_v4` + CROSS JOIN + UNNEST( + ARRAY_CONCAT( + payload.events.parent, + payload.events.content, + payload.events.dynamic, + payload.events.extension, + payload.events.gpu + ) + ) AS event + CROSS JOIN + UNNEST(event.f5_) AS event_map_value +{% endif %} +WHERE + DATE(submission_timestamp) > "2023-10-23" +GROUP BY + window_start, + window_end, + event_category, + event_name, + event_extra_key, + normalized_channel, + version \ No newline at end of file diff --git a/sql_generators/event_monitoring/templates/metadata.yaml b/sql_generators/event_monitoring/templates/metadata.yaml new file mode 100644 index 00000000000..b3707c31e85 --- /dev/null +++ b/sql_generators/event_monitoring/templates/metadata.yaml @@ -0,0 +1,8 @@ +friendly_name: Event Monitoring Live +description: |- + Materialized view of experimentation related events + coming from {{ dataset_id }}. +owners: +- ascholtz@mozilla.com +labels: + materialized_view: true