Skip to content

Commit

Permalink
Add ios baseline_clients_yearly (#4506)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbertsch authored and kik-kik committed Nov 14, 2023
1 parent ad74568 commit 4cd34d5
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
friendly_name: Baseline Clients Yearly
description: |-
Captures activity history of each client in 1-Year
windows for each submission date based on baseline pings.
Clustering fields: `normalized_channel`, `sample_id`
See also: `baseline_clients_daily`
owners:
- frank@mozilla.com
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{% set usage_types = [("seen", "TRUE"), ("active", "durations > 0")] %}

CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.firefox_ios.baseline_clients_yearly`
AS
SELECT
{% for usage_type, _ in usage_types %}
`moz-fx-data-shared-prod`.udf.bits_to_days_since_seen(days_{{ usage_type }}_bytes) AS days_since_{{ usage_type }},
`moz-fx-data-shared-prod`.udf.bits_to_days_since_seen(~days_{{ usage_type }}_bytes) AS consecutive_days_{{ usage_type }},
`moz-fx-data-shared-prod`.udf.bits_to_days_seen(days_{{ usage_type }}_bytes) AS days_{{ usage_type }}_in_past_year,
{% endfor %}
DATE_DIFF(submission_date, first_seen_date, DAY) AS days_since_first_seen,
EXTRACT(DAYOFWEEK FROM submission_date) AS day_of_week,
*
FROM
`moz-fx-data-shared-prod.firefox_ios_derived.baseline_clients_yearly_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#fail
{{ is_unique(["client_id"], "submission_date = @submission_date") }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
friendly_name: Baseline Clients Yearly
description: |-
Captures activity history of each client in 1-Year
windows for each submission date based on baseline pings.
Clustering fields: `normalized_channel`, `sample_id`
See also: `baseline_clients_daily`
owners:
- frank@mozilla.com
labels:
incremental: true
owner1: frank@mozilla.com
scheduling:
dag_name: bqetl_firefox_ios
depends_on_past: true
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: null
clustering:
fields: [normalized_channel, sample_id]
deprecated: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{% set usage_types = [("seen", "TRUE"), ("active", "durations > 0")] %}

{% if is_init() %}

CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod`.firefox_ios_derived.baseline_clients_yearly_v1
PARTITION BY
submission_date
CLUSTER BY
normalized_channel,
sample_id
OPTIONS
(require_partition_filter = TRUE)
AS
SELECT
{% for usage_type, _ in usage_types %}
CAST(NULL AS BYTES) AS days_{{ usage_type }}_bytes,
{% endfor %}
-- We make sure to delay * until the end so that as new columns are added
-- to the daily table we can add those columns in the same order to the end
-- of this schema, which may be necessary for the daily join query between
-- the two tables to validate.
*
FROM
`moz-fx-data-shared-prod`.firefox_ios.baseline_clients_daily
WHERE
-- Output empty table and read no input rows
FALSE

{% else %}

WITH _current AS (
SELECT
-- In this raw table, we capture the history of activity over the past
-- 365 days for each usage criterion as an array of bytes. The
-- rightmost bit represents whether the user was active in the current day.
{% for usage_type, criterion in usage_types %}
udf.bool_to_365_bits({{ criterion }}) AS days_{{ usage_type }}_bytes,
{% endfor %}
* EXCEPT (submission_date),
FROM
`moz-fx-data-shared-prod`.firefox_ios.baseline_clients_daily
WHERE
submission_date = @submission_date
AND sample_id IS NOT NULL
),
--
_previous AS (
SELECT
* EXCEPT (submission_date)
FROM
`moz-fx-data-shared-prod`.firefox_ios_derived.baseline_clients_yearly_v1
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
-- Filter out rows from yesterday that have now fallen outside the 365-day window.
AND BIT_COUNT(udf.shift_365_bits_one_day(days_seen_bytes)) > 0
AND sample_id IS NOT NULL
)
--
SELECT
@submission_date AS submission_date,
IF(_current.client_id IS NOT NULL, _current, _previous).* REPLACE (
{% for usage_type, _ in usage_types %}
udf.combine_adjacent_days_365_bits(
_previous.days_{{ usage_type }}_bytes,
_current.days_{{ usage_type }}_bytes
) AS days_{{ usage_type }}_bytes
{{ "," if not loop.last }}
{% endfor %}
)
FROM
_current
FULL JOIN
_previous
USING
(client_id)

{% endif %}

0 comments on commit 4cd34d5

Please sign in to comment.