-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
234 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
"""Event monitoring view generation.""" | ||
import os | ||
from pathlib import Path | ||
|
||
import requests | ||
import click | ||
import yaml | ||
from jinja2 import Environment, FileSystemLoader | ||
|
||
from bigquery_etl.cli.utils import use_cloud_function_option | ||
from bigquery_etl.format_sql.formatter import reformat | ||
from bigquery_etl.util.common import write_sql | ||
from bigquery_etl.config import ConfigLoader | ||
|
||
FILE_PATH = Path(os.path.dirname(__file__)) | ||
BASE_DIR = Path(FILE_PATH).parent.parent | ||
APP_LISTINGS_URL = "https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings" | ||
|
||
def get_app_info(): | ||
"""Return a list of applications from the probeinfo API.""" | ||
resp = requests.get(APP_LISTINGS_URL) | ||
resp.raise_for_status() | ||
apps_json = resp.json() | ||
app_info = {} | ||
|
||
for app in apps_json: | ||
if app["app_name"] not in app_info: | ||
app_info[app["app_name"]] = [app] | ||
else: | ||
app_info[app["app_name"]].append(app) | ||
|
||
return app_info | ||
|
||
|
||
def generate_queries(project, path, write_dir): | ||
"""Generate event monitoring views.""" | ||
app_info = get_app_info() | ||
|
||
app_info = [info for name, info in app_info.items() if name not in ConfigLoader.get( | ||
"generate", "event_monitoring", "skip_apps", fallback=[] | ||
)] | ||
|
||
template_query_dir = FILE_PATH / "templates" | ||
env = Environment( | ||
loader=FileSystemLoader(template_query_dir), | ||
keep_trailing_newline=True, | ||
) | ||
sql_template = env.get_template("event_monitoring_live.init.sql") | ||
metadata_template = env.get_template("metadata.yaml") | ||
|
||
for info in app_info: | ||
|
||
|
||
write_sql( | ||
write_dir / project, | ||
f"{project}.{info['']}.{query}", | ||
sql_template_file, | ||
reformat(sql_template.render(**args)), | ||
) | ||
|
||
write_path = Path(write_dir) / project / "telemetry_derived" / query | ||
(write_path / "metadata.yaml").write_text(metadata_template.render(**args)) | ||
|
||
|
||
for query, args in template_config["queries"].items(): | ||
template_query_dir = FILE_PATH / "templates" / query | ||
env = Environment( | ||
loader=FileSystemLoader(template_query_dir), | ||
keep_trailing_newline=True, | ||
) | ||
sql_templates = list(template_query_dir.glob("*.sql")) | ||
sql_template_file = sql_templates[0].name | ||
sql_template = env.get_template(sql_template_file) | ||
metadata_template = env.get_template("metadata.yaml") | ||
|
||
args["destination_table"] = query | ||
args["search_metrics"] = template_config["search_metrics"] | ||
|
||
if args["per_app"]: | ||
# generate a separate query for each application dataset | ||
for dataset in template_config["applications"]: | ||
args["dataset"] = dataset | ||
|
||
write_sql( | ||
write_dir / project, | ||
f"{project}.{dataset}_derived.{query}", | ||
sql_template_file, | ||
reformat(sql_template.render(**args)), | ||
) | ||
|
||
write_path = Path(write_dir) / project / (dataset + "_derived") / query | ||
(write_path / "metadata.yaml").write_text( | ||
metadata_template.render(**args) | ||
) | ||
else: | ||
# generate a single query that UNIONs application datasets | ||
# these queries are written to `telemetry` | ||
args["applications"] = template_config["applications"] | ||
|
||
write_sql( | ||
write_dir / project, | ||
f"{project}.telemetry_derived.{query}", | ||
sql_template_file, | ||
reformat(sql_template.render(**args)), | ||
) | ||
|
||
write_path = Path(write_dir) / project / "telemetry_derived" / query | ||
(write_path / "metadata.yaml").write_text(metadata_template.render(**args)) | ||
|
||
|
||
@click.command("generate") | ||
@click.option( | ||
"--target-project", | ||
"--target_project", | ||
help="Which project the queries should be written to.", | ||
default="moz-fx-data-shared-prod", | ||
) | ||
@click.option( | ||
"--path", | ||
help="Where query directories will be searched for.", | ||
default="sql_generators/event_monitoring/templates", | ||
required=False, | ||
type=click.Path(file_okay=False), | ||
) | ||
@click.option( | ||
"--output-dir", | ||
"--output_dir", | ||
help="The location to write to. Defaults to sql/.", | ||
default=Path("sql"), | ||
type=click.Path(file_okay=False), | ||
) | ||
@use_cloud_function_option | ||
def generate(target_project, path, output_dir, use_cloud_function): | ||
"""Generate the event monitoring views.""" | ||
output_dir = Path(output_dir) | ||
generate_queries(target_project, path, output_dir) | ||
|
16 changes: 16 additions & 0 deletions
16
sql_generators/event_monitoring/templates/event_monitoring.view.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{% for app in apps %} | ||
SELECT | ||
"{{ app['app_id'] }}" AS app_id, | ||
"{{ app['app_name'] }}" AS app_name, | ||
window_start, | ||
window_end, | ||
event_category, | ||
event_name, | ||
event_extra_key, | ||
normalized_channel, | ||
version, | ||
total_events | ||
FROM | ||
`{{ project_id }}.{{ app['app_id'] }}_derived.event_monitoring_live` | ||
{% if not loop.last %} UNION ALL{% endif %} | ||
{% endfor %} |
68 changes: 68 additions & 0 deletions
68
sql_generators/event_monitoring/templates/event_monitoring_live.init.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
CREATE MATERIALIZED VIEW | ||
IF | ||
NOT EXISTS {{ project_id }}.{{ dataset_id }}_derived.event_monitoring_live | ||
OPTIONS | ||
(enable_refresh = TRUE, refresh_interval_minutes = 60) | ||
AS | ||
{% if dataset_id != "telemetry" %} | ||
SELECT | ||
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR), | ||
-- Aggregates event counts over 30-minute intervals | ||
INTERVAL(DIV(EXTRACT(MINUTE | ||
FROM | ||
TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) * 60) MINUTE ) AS window_start, | ||
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE | ||
FROM | ||
TIMESTAMP_ADD(SAFE.PARSE_TIMESTAMP('%FT%H:%M%Ez', ping_info.start_time), INTERVAL event.timestamp MILLISECOND)), 60) + 1) * 60) MINUTE ) AS window_end, | ||
event.category AS event_category, | ||
event.name AS event_name, | ||
event_extra.key AS event_extra_key, | ||
normalized_channel, | ||
client_info.app_display_version AS version, | ||
COUNT(*) AS total_events | ||
FROM | ||
`{{ project_id }}.{{ dataset_id }}_live.events_v1` | ||
CROSS JOIN | ||
UNNEST(events) AS event, | ||
UNNEST(event.extra) AS event_extra | ||
{% else %} | ||
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR), | ||
-- Aggregates event counts over 30-minute intervals | ||
INTERVAL(DIV(EXTRACT(MINUTE | ||
FROM | ||
TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) * 30) MINUTE ) AS window_start, | ||
TIMESTAMP_ADD( TIMESTAMP_TRUNC(TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND), HOUR), INTERVAL((DIV(EXTRACT(MINUTE | ||
FROM | ||
TIMESTAMP_ADD(submission_timestamp, INTERVAL event.f0_ MILLISECOND)), 30) + 1) * 30) MINUTE ) AS window_end, | ||
|
||
event.f2_ AS event_name, | ||
event.f1_ AS event_category, | ||
event_map_value.key = 'branch' AS event_extra_key, | ||
normalized_channel, | ||
application.version AS version, | ||
COUNT(*) AS total_events | ||
FROM | ||
`moz-fx-data-shared-prod.telemetry_live.event_v4` | ||
CROSS JOIN | ||
UNNEST( | ||
ARRAY_CONCAT( | ||
payload.events.parent, | ||
payload.events.content, | ||
payload.events.dynamic, | ||
payload.events.extension, | ||
payload.events.gpu | ||
) | ||
) AS event | ||
CROSS JOIN | ||
UNNEST(event.f5_) AS event_map_value | ||
{% endif %} | ||
WHERE | ||
DATE(submission_timestamp) > "2023-10-23" | ||
GROUP BY | ||
window_start, | ||
window_end, | ||
event_category, | ||
event_name, | ||
event_extra_key, | ||
normalized_channel, | ||
version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
friendly_name: Event Monitoring Live | ||
description: |- | ||
Materialized view of experimentation related events | ||
coming from {{ dataset_id }}. | ||
owners: | ||
- ascholtz@mozilla.com | ||
labels: | ||
materialized_view: true |