Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mobile internet outages - rough draft #5975

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#warn
{{ min_row_count(500, where="DATE(`datetime`) = @submission_date") }}

#warn
{{ is_unique(columns=["country","city","geo_subdivision1","geo_subdivision2", "isp_name", "datetime"], where="DATE(`datetime`) = @submission_date") }}

#warn
{{ not_null(columns=[
"country",
"city",
"datetime",
], where="DATE(`datetime`) = @submission_date") }}

#warn
{{ value_length(column="country", expected_length=2, where="DATE(`datetime`) = @submission_date") }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
friendly_name: Internet Outages (Mobile)
# yamllint disable rule:line-length
description: |-
This contains a set aggregated metrics that correlate to internet
outages for different countries in the world specifically build using mobile data.

owners:
- aplacitelli@mozilla.com
labels:
incremental: true
scheduling:
dag_name: bqetl_internet_outages
bigquery:
time_partitioning:
type: day
field: datetime
require_partition_filter: null
clustering:
fields:
- country
- city
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
-- combine baseline pings FROM different products
WITH product_union AS (
-- Firefox for Android
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.fenix.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Firefox for iOS
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.firefox_ios.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Firefox Focus for Android
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.focus_android.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Firefox Focus for iOS
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.focus_ios.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Firefox Klar for Android
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS submission_hour,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.klar_android.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Firefox Klar for iOS
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.klar_ios.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
UNION ALL
-- Mozilla VPN
SELECT
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
TIMESTAMP_TRUNC(submission_timestamp, HOUR) `datetime`,
metadata.isp.name AS isp_name,
ping_info.end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.mozilla_vpn.baseline`
WHERE
DATE(submission_timestamp)
BETWEEN "2024-06-20"
AND "2024-06-29"
AND (metadata.geo.country IS NOT NULL OR metadata.geo.country <> "??")
),
update_values AS (
SELECT
* REPLACE (
IF(city = "??" OR city IS NULL, "unknown", city) AS city,
NULLIF(geo_subdivision1, "??") AS geo_subdivision1,
NULLIF(geo_subdivision2, "??") AS geo_subdivision2
),
CASE
WHEN REGEXP_CONTAINS(
end_time,
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}[-|+]\\d{2}:\\d{2}"
)
THEN TIMESTAMP(end_time)
WHEN REGEXP_CONTAINS(end_time, "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}[-|+]\\d{2}:\\d{2}")
THEN PARSE_TIMESTAMP("%Y-%m-%dT%H:%M%Ez", end_time)
ELSE NULL
END AS parsed_end_time,
FROM
product_union
)
SELECT
country,
city,
geo_subdivision1,
geo_subdivision2,
isp_name,
`datetime`,
COUNT(*) AS ping_count,
AVG(
ABS(TIMESTAMP_DIFF(submission_timestamp, parsed_end_time, MINUTE))
) AS avg_ping_arrival_delay_in_minutes,
FROM
update_values
GROUP BY
country,
city,
geo_subdivision1,
geo_subdivision2,
isp_name,
`datetime`
HAVING
ping_count > 50
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
fields:
- mode: NULLABLE
name: country
type: STRING
- mode: NULLABLE
name: city
type: STRING
- mode: NULLABLE
name: geo_subdivision1
type: STRING
- mode: NULLABLE
name: geo_subdivision2
type: STRING
- mode: NULLABLE
name: isp_name
type: STRING
- mode: NULLABLE
name: datetime
type: TIMESTAMP
- mode: NULLABLE
name: INTEGER
type: ping_count
- mode: NULLABLE
name: avg_ping_arrival_delay_in_minutes
type: FLOAT