Skip to content

Commit

Permalink
Add new macros for diff calculation, and unit tests (#99)
Browse files Browse the repository at this point in the history
* Add macro for new hash-based comparison strategy

* split out SF-focused version of macro

* Fix change to complex object

* Fix overuse of star

* switch from compare rels to compare queries

* provide wrapping parens

* switch to array of columns for PK

* split unit tests into own files, change unit tests to array pk

* tidy up get_comp_bounds

* fix arg rename

* add quick_are_queries_identical and unit tests

* Move data tests into own directory

* Add test for multiple PKs

* fix incorrect unit test configs

* make data types for id and id_2 big enough nums

* Mock event_time response

* fix hardcoded value in quick_are_qs_identical

* Add unit tests for null handling (still broken)

* Rename columsn to be more unique

* Steal surrogate key macro from utils

* Use generated surrogate key across the board in place of PK

* rm my profile reference

* Update quick_are_queries_identical.sql

* Add diagram explaining comparison bounds

* Add comments explaining warehouse-specific optimisations

* cross-db support

* subq

* no postgres or redshift for a sec

* add default var values for compare wrappers

* avoid lateral alias reference for BQ

* BQ doesn't support count(arg1, arg2)

* re-enable redshift

* Alias subq for redshift

* remove extra comma

* add row status of nonunique_pk

* remove redundant test and wrapper model

* Create json-y tests for snowflake

* Add workaround for redshift to support count num rows in status

* skip incompatible tests

* Fix redshift lack of bool_or support in window funcs

* add skip exclusions for everything else

* fix incorrect skip tag application

* Move user configs to project.yml from profiles

* Temporarily disable unpassable redshift tests

* add temp skip to circle's config.yml

* forgot tag: method

* Temporarily skip reworked_compare_all_statuses_different_column_set

* Skip another test redshift

* disable unsupported tests BQ

* postgres too?

* Fixes for postgres

* namespace macros

* It's a postgres problem, not a redshift problem

* Handle postgres 63 char limit

* Add databricks

* Rename tests to data_tests

* Found a better workaround for missing count distinct window

* actually call the macro

* disable syntax-failing tests on dbx
  • Loading branch information
joellabes authored May 27, 2024
1 parent 8473293 commit 9da3c51
Show file tree
Hide file tree
Showing 45 changed files with 997 additions and 36 deletions.
30 changes: 21 additions & 9 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
. dbt_venv/bin/activate
python -m pip install --upgrade pip setuptools
python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery
python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery dbt-databricks
mkdir -p ~/.dbt
cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml
Expand All @@ -52,8 +52,8 @@ jobs:
dbt deps --target postgres
dbt seed --target postgres --full-refresh
dbt compile --target postgres
dbt run --target postgres
dbt test --target postgres
dbt run --target postgres --exclude tag:skip+ tag:temporary_skip+
dbt test --target postgres --exclude tag:skip+ tag:temporary_skip+
- run:
name: "Run Tests - Redshift"
Expand All @@ -64,8 +64,8 @@ jobs:
dbt deps --target redshift
dbt seed --target redshift --full-refresh
dbt compile --target redshift
dbt run --target redshift
dbt test --target redshift
dbt run --target redshift --exclude tag:skip+ tag:temporary_skip+
dbt test --target redshift --exclude tag:skip+ tag:temporary_skip+
- run:
name: "Run Tests - Snowflake"
Expand All @@ -76,8 +76,8 @@ jobs:
dbt deps --target snowflake
dbt seed --target snowflake --full-refresh
dbt compile --target snowflake
dbt run --target snowflake
dbt test --target snowflake
dbt run --target snowflake --exclude tag:skip+ tag:temporary_skip+
dbt test --target snowflake --exclude tag:skip+ tag:temporary_skip+
- run:
name: "Run Tests - BigQuery"
Expand All @@ -91,9 +91,20 @@ jobs:
dbt deps --target bigquery
dbt seed --target bigquery --full-refresh
dbt compile --target bigquery
dbt run --target bigquery --full-refresh
dbt test --target bigquery
dbt run --target bigquery --full-refresh --exclude tag:skip+ tag:temporary_skip+
dbt test --target bigquery --exclude tag:skip+ tag:temporary_skip+
- run:
name: "Run Tests - Databricks"
command: |
. dbt_venv/bin/activate
echo `pwd`
cd integration_tests
dbt deps --target databricks
dbt seed --target databricks --full-refresh
dbt compile --target databricks
dbt run --target databricks --exclude tag:skip+ tag:temporary_skip+
dbt test --target databricks --exclude tag:skip+ tag:temporary_skip+
- save_cache:
key: deps1-{{ .Branch }}
Expand All @@ -115,3 +126,4 @@ workflows:
- profile-redshift
- profile-snowflake
- profile-bigquery
- profile-databricks
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
target/
dbt_packages/
logs/
logfile
logfile
.DS_Store
21 changes: 21 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"yaml.schemas": {
"https://github.com/raw/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [
"/**/*.yml",
"!profiles.yml",
"!dbt_project.yml",
"!packages.yml",
"!selectors.yml",
"!profile_template.yml"
],
"https://github.com/raw/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [
"dbt_project.yml"
],
"https://github.com/raw/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [
"selectors.yml"
],
"https://github.com/raw/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [
"packages.yml"
]
},
}
18 changes: 11 additions & 7 deletions integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
# HEY! This file is used in the dbt-audit-helper integrations tests with CircleCI.
# You should __NEVER__ check credentials into version control. Thanks for reading :)

config:
send_anonymous_usage_stats: False
use_colors: True

integration_tests:
target: postgres
outputs:
Expand All @@ -27,15 +23,15 @@ integration_tests:
dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}"
port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}"
schema: audit_helper_integration_tests_redshift
threads: 1
threads: 8

bigquery:
type: bigquery
method: service-account
keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}"
project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}"
schema: audit_helper_integration_tests_bigquery
threads: 1
threads: 8

snowflake:
type: snowflake
Expand All @@ -46,4 +42,12 @@ integration_tests:
database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}"
warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}"
schema: audit_helper_integration_tests_snowflake
threads: 1
threads: 8

databricks:
type: databricks
schema: dbt_project_evaluator_integration_tests_databricks
host: "{{ env_var('DATABRICKS_TEST_HOST') }}"
http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}"
token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}"
threads: 10
12 changes: 12 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,15 @@ clean-targets: # directories to be removed by `dbt clean`

seeds:
+quote_columns: false

vars:
compare_queries_summarize: true
reworked_compare__primary_key_columns: ['col1']
reworked_compare__columns: ['col1']
reworked_compare__event_time:
quick_are_queries_identical_cols: ['col1']
quick_are_queries_identical_event_time:

flags:
send_anonymous_usage_stats: False
use_colors: True
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,96 @@ version: 2

models:
- name: compare_queries
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_relations_without_exclude')

- name: compare_queries_concat_pk_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_without_summary')

- name: compare_queries_with_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_with_summary')

- name: compare_queries_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_without_summary')

- name: compare_relations_with_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_with_summary')

- name: compare_relations_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_without_summary')

- name: compare_relations_with_exclude
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_relations_with_exclude')

- name: compare_relations_without_exclude
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_relations_without_exclude')

- name: compare_all_columns_with_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_with_summary')

- name: compare_all_columns_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_without_summary')

- name: compare_all_columns_concat_pk_with_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_concat_pk_with_summary')

- name: compare_all_columns_concat_pk_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_concat_pk_without_summary')

- name: compare_all_columns_with_summary_and_exclude
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_with_summary_and_exclude')

- name: compare_all_columns_where_clause
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_all_columns_where_clause')

- name: compare_relation_columns
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_relation_columns')

- name: compare_relations_concat_pk_without_summary
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_without_summary')

- name: compare_which_columns_differ
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_which_columns_differ')

- name: compare_which_columns_differ_exclude_cols
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_which_columns_differ_exclude_cols')

- name: compare_row_counts
tests:
data_tests:
- dbt_utils.equality:
compare_model: ref('expected_results__compare_row_counts')
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select 12 as id, 22 as id_2, 'xyz' as col1, 'tuv' as col2, 123 as col3, {{ dbt.current_timestamp() }} as created_at
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}

select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}

select 1 as id, 'John Doe' as col1, object_construct('street', '123 Main St', 'city', 'Anytown', 'state', 'CA') as col2
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

{{
audit_helper.compare_queries(
"select * from " ~ ref('unit_test_model_a'),
"select * from " ~ ref('unit_test_model_b'),
summarize = var('compare_queries_summarize')
)
}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
unit_tests:
- name: identical_records_compare_queries
model: unit_compare_queries
description: The world's most basic unit test.

given:
- input: ref('unit_test_model_a')
rows:
- { "id": 1, "col1": "abc", "col2": "def" }
- { "id": 2, "col1": "hij", "col2": "klm" }
- { "id": 3, "col1": "nop", "col2": "qrs" }
- input: ref('unit_test_model_b')
rows:
- { "id": 1, "col1": "abc", "col2": "def" }
- { "id": 2, "col1": "hij", "col2": "klm" }
- { "id": 3, "col1": "nop", "col2": "qrs" }

expect:
rows:
- {"in_a": true, "in_b": true}

overrides:
vars:
compare_queries_summarize: true

- name: identical_records_compare_queries_no_summarize
model: unit_compare_queries
description: The world's second most basic unit test.

given:
- input: ref('unit_test_model_a')
rows:
- { "id": 1, "col1": "abc", "col2": "def" }
- { "id": 2, "col1": "hij", "col2": "klm" }
- { "id": 3, "col1": "nop", "col2": "qrs" }
- input: ref('unit_test_model_b')
rows:
- { "id": 1, "col1": "abc", "col2": "def" }
- { "id": 2, "col1": "hij", "col2": "klm" }
- { "id": 3, "col1": "nop", "col2": "qrs" }

expect:
rows: []

overrides:
vars:
compare_queries_summarize: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{ config(tags=['skip' if (target.type in ['redshift', 'bigquery', 'postgres', 'databricks']) else 'runnable']) }}

{{
audit_helper.quick_are_queries_identical(
"select * from " ~ ref('unit_test_model_a'),
"select * from " ~ ref('unit_test_model_b'),
columns=var('quick_are_queries_identical_cols'),
event_time=var('quick_are_queries_identical_event_time')
)
}}
Loading

0 comments on commit 9da3c51

Please sign in to comment.