Skip to content

Commit

Permalink
WIP event monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
scholtzan committed Oct 31, 2023
1 parent b592df7 commit 7ed68a8
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 0 deletions.
5 changes: 5 additions & 0 deletions bqetl_project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -426,3 +426,8 @@ generate:
- sql/moz-fx-data-shared-prod/mozilla_vpn/events/**
- sql/moz-fx-data-shared-prod/mozilla_vpn/main/**
- sql/moz-fx-data-shared-prod/fenix/client_deduplication/**
event_monitoring:
skip_apps:
- mlhackweek_search
- regrets_reporter
- regrets_reporter_ucs
137 changes: 137 additions & 0 deletions sql_generators/event_monitoring/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Event monitoring view generation."""
import os
from pathlib import Path

import requests
import click
import yaml
from jinja2 import Environment, FileSystemLoader

from bigquery_etl.cli.utils import use_cloud_function_option
from bigquery_etl.format_sql.formatter import reformat
from bigquery_etl.util.common import write_sql
from bigquery_etl.config import ConfigLoader

FILE_PATH = Path(os.path.dirname(__file__))
BASE_DIR = Path(FILE_PATH).parent.parent
APP_LISTINGS_URL = "https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings"

def get_app_info():
"""Return a list of applications from the probeinfo API."""
resp = requests.get(APP_LISTINGS_URL)
resp.raise_for_status()
apps_json = resp.json()
app_info = {}

for app in apps_json:
if app["app_name"] not in app_info:
app_info[app["app_name"]] = [app]
else:
app_info[app["app_name"]].append(app)

return app_info


def generate_queries(project, path, write_dir):
"""Generate event monitoring views."""
app_info = get_app_info()

app_info = [info for name, info in app_info.items() if name not in ConfigLoader.get(
"generate", "event_monitoring", "skip_apps", fallback=[]
)]

template_query_dir = FILE_PATH / "templates"
env = Environment(
loader=FileSystemLoader(template_query_dir),
keep_trailing_newline=True,
)
sql_template = env.get_template("event_monitoring_live.init.sql")
metadata_template = env.get_template("metadata.yaml")

for info in app_info:


write_sql(
write_dir / project,
f"{project}.{info['']}.{query}",
sql_template_file,
reformat(sql_template.render(**args)),
)

write_path = Path(write_dir) / project / "telemetry_derived" / query
(write_path / "metadata.yaml").write_text(metadata_template.render(**args))


for query, args in template_config["queries"].items():
template_query_dir = FILE_PATH / "templates" / query
env = Environment(
loader=FileSystemLoader(template_query_dir),
keep_trailing_newline=True,
)
sql_templates = list(template_query_dir.glob("*.sql"))
sql_template_file = sql_templates[0].name
sql_template = env.get_template(sql_template_file)
metadata_template = env.get_template("metadata.yaml")

args["destination_table"] = query
args["search_metrics"] = template_config["search_metrics"]

if args["per_app"]:
# generate a separate query for each application dataset
for dataset in template_config["applications"]:
args["dataset"] = dataset

write_sql(
write_dir / project,
f"{project}.{dataset}_derived.{query}",
sql_template_file,
reformat(sql_template.render(**args)),
)

write_path = Path(write_dir) / project / (dataset + "_derived") / query
(write_path / "metadata.yaml").write_text(
metadata_template.render(**args)
)
else:
# generate a single query that UNIONs application datasets
# these queries are written to `telemetry`
args["applications"] = template_config["applications"]

write_sql(
write_dir / project,
f"{project}.telemetry_derived.{query}",
sql_template_file,
reformat(sql_template.render(**args)),
)

write_path = Path(write_dir) / project / "telemetry_derived" / query
(write_path / "metadata.yaml").write_text(metadata_template.render(**args))


@click.command("generate")
@click.option(
"--target-project",
"--target_project",
help="Which project the queries should be written to.",
default="moz-fx-data-shared-prod",
)
@click.option(
"--path",
help="Where query directories will be searched for.",
default="sql_generators/event_monitoring/templates",
required=False,
type=click.Path(file_okay=False),
)
@click.option(
"--output-dir",
"--output_dir",
help="The location to write to. Defaults to sql/.",
default=Path("sql"),
type=click.Path(file_okay=False),
)
@use_cloud_function_option
def generate(target_project, path, output_dir, use_cloud_function):
"""Generate the event monitoring views."""
output_dir = Path(output_dir)
generate_queries(target_project, path, output_dir)

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% for app in apps %}
SELECT
"{{ app['app_id'] }}" AS app_id,
"{{ app['app_name'] }}" AS app_name,
window_start,
window_end,
event_category,
event_name,
event_extra_key,
normalized_channel,
version,
total_events
FROM
`{{ project_id }}.{{ app['app_id'] }}_derived.event_monitoring_live`
{% if not loop.last %} UNION ALL{% endif %}
{% endfor %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
CREATE MATERIALIZED VIEW
IF
NOT EXISTS {{ project_id }}.{{ dataset_id }}_derived.event_monitoring_live
OPTIONS
(enable_refresh = TRUE, refresh_interval_minutes = 60)
AS
{% if dataset_id != "telemetry" %}
SELECT
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR),
-- Aggregates event counts over 30-minute intervals
INTERVAL(DIV(EXTRACT(MINUTE
FROM
TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) * 60) MINUTE ) AS window_start,
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE
FROM
TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) + 1) * 60) MINUTE ) AS window_end,
event.category AS event_category,
event.name AS event_name,
event_extra.key AS event_extra_key,
normalized_channel,
client_info.app_display_version AS version,
COUNT(*) AS total_events
FROM
`{{ project_id }}.{{ dataset_id }}_live.events_v1`
CROSS JOIN
UNNEST(events) AS event,
UNNEST(event.extra) AS event_extra
{% else %}
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR),
-- Aggregates event counts over 30-minute intervals
INTERVAL(DIV(EXTRACT(MINUTE
FROM
TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) * 30) MINUTE ) AS window_start,
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE
FROM
TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) + 1) * 30) MINUTE ) AS window_end,

event.f2_ AS event_name,
event.f1_ AS event_category,
event_map_value.key = 'branch' AS event_extra_key,
normalized_channel,
application.version AS version,
COUNT(*) AS total_events
FROM
`moz-fx-data-shared-prod.telemetry_live.event_v4`
CROSS JOIN
UNNEST(
ARRAY_CONCAT(
payload.events.parent,
payload.events.content,
payload.events.dynamic,
payload.events.extension,
payload.events.gpu
)
) AS event
CROSS JOIN
UNNEST(event.f5_) AS event_map_value
{% endif %}
WHERE
DATE(submission_timestamp) > "2023-10-23"
GROUP BY
window_start,
window_end,
event_category,
event_name,
event_extra_key,
normalized_channel,
version
8 changes: 8 additions & 0 deletions sql_generators/event_monitoring/templates/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
friendly_name: Event Monitoring Live
description: |-
Materialized view of experimentation related events
coming from {{ dataset_id }}.
owners:
- ascholtz@mozilla.com
labels:
materialized_view: true

0 comments on commit 7ed68a8

Please sign in to comment.