From ec40d466b5fa51522edfa8c1e02580316aa36a2c Mon Sep 17 00:00:00 2001 From: Vitor Avila Date: Thu, 14 Mar 2024 10:25:49 -0300 Subject: [PATCH 1/2] feat(dbt): Supporting sync columns with --preserve-metadata and --merge-metadata --- src/preset_cli/api/clients/superset.py | 12 + .../cli/superset/sync/dbt/command.py | 18 +- .../cli/superset/sync/dbt/datasets.py | 358 ++- tests/api/clients/superset_test.py | 51 + tests/cli/superset/sync/dbt/command_test.py | 74 + tests/cli/superset/sync/dbt/datasets_test.py | 2077 +++++++++-------- 6 files changed, 1496 insertions(+), 1094 deletions(-) diff --git a/src/preset_cli/api/clients/superset.py b/src/preset_cli/api/clients/superset.py index c04736a6..af5936c7 100644 --- a/src/preset_cli/api/clients/superset.py +++ b/src/preset_cli/api/clients/superset.py @@ -570,6 +570,18 @@ def get_dataset(self, dataset_id: int) -> Any: """ return self.get_resource("dataset", dataset_id) + def get_refreshed_dataset_columns(self, dataset_id: int) -> List[Any]: + """ + Return dataset columns. + """ + url = self.baseurl / "datasource/external_metadata/table" / str(dataset_id) + _logger.debug("GET %s", url) + response = self.session.get(url) + validate_response(response) + + resource = response.json() + return resource + def get_datasets(self, **kwargs: str) -> List[Any]: """ Return datasets, possibly filtered. diff --git a/src/preset_cli/cli/superset/sync/dbt/command.py b/src/preset_cli/cli/superset/sync/dbt/command.py index 7bce4969..2c647109 100644 --- a/src/preset_cli/cli/superset/sync/dbt/command.py +++ b/src/preset_cli/cli/superset/sync/dbt/command.py @@ -106,6 +106,12 @@ default=False, help="End the execution with an error if a model fails to sync or a deprecated feature is used", ) +@click.option( + "--sync-columns", + is_flag=True, + default=False, + help="Sync columns from source.", +) @raise_cli_errors @click.pass_context def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many-locals ,too-many-statements @@ -125,6 +131,7 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many preserve_metadata: bool = False, merge_metadata: bool = False, raise_failures: bool = False, + sync_columns: bool = False, ) -> None: """ Sync models/metrics from dbt Core to Superset and charts/dashboards to dbt exposures. @@ -245,6 +252,7 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many external_url_prefix, reload_columns=reload_columns, merge_metadata=merge_metadata, + sync_columns=sync_columns, ) if exposures: @@ -505,6 +513,12 @@ def fetch_sl_metrics( default=False, help="End the execution with an error if a model fails to sync or a deprecated feature is used", ) +@click.option( + "--sync-columns", + is_flag=True, + default=False, + help="Sync columns from source.", +) @click.pass_context @raise_cli_errors def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals @@ -522,8 +536,9 @@ def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals preserve_columns: bool = False, preserve_metadata: bool = False, merge_metadata: bool = False, - raise_failures: bool = False, access_url: Optional[str] = None, + raise_failures: bool = False, + sync_columns: bool = False, ) -> None: """ Sync models/metrics from dbt Cloud to Superset. @@ -589,6 +604,7 @@ def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals external_url_prefix, reload_columns=reload_columns, merge_metadata=merge_metadata, + sync_columns=sync_columns, ) if exposures: diff --git a/src/preset_cli/cli/superset/sync/dbt/datasets.py b/src/preset_cli/cli/superset/sync/dbt/datasets.py index a5dc4786..c09d793b 100644 --- a/src/preset_cli/cli/superset/sync/dbt/datasets.py +++ b/src/preset_cli/cli/superset/sync/dbt/datasets.py @@ -16,7 +16,7 @@ from preset_cli.api.clients.dbt import ModelSchema from preset_cli.api.clients.superset import SupersetClient, SupersetMetricDefinition from preset_cli.api.operators import OneToMany -from preset_cli.exceptions import SupersetError +from preset_cli.exceptions import CLIError, SupersetError DEFAULT_CERTIFICATION = {"details": "This table is produced by dbt"} @@ -36,9 +36,18 @@ def model_in_database(model: ModelSchema, url: SQLAlchemyURL) -> bool: def clean_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]: """ Remove incompatbile columns from metatada. - When updating an existing column/metric we need to remove some fields from the payload. + When creating/updating an column/metric we need to remove some fields from the payload. """ - for key in ("changed_on", "created_on", "type_generic"): + for key in ( + "autoincrement", + "changed_on", + "comment", + "created_on", + "default", + "name", + "nullable", + "type_generic", + ): if key in metadata: del metadata[key] @@ -77,7 +86,199 @@ def create_dataset( return client.create_dataset(**kwargs) -def sync_datasets( # pylint: disable=too-many-locals, too-many-branches, too-many-arguments, too-many-statements # noqa:C901 +def get_or_create_dataset( + client: SupersetClient, + model: ModelSchema, + database: Any, +) -> Dict[str, Any]: + """ + Returns the existing dataset or creates a new one. + """ + filters = { + "database": OneToMany(database["id"]), + "schema": model["schema"], + "table_name": model.get("alias") or model["name"], + } + existing = client.get_datasets(**filters) + + if len(existing) > 1: + raise CLIError("More than one dataset found", 1) + + if existing: + dataset = existing[0] + _logger.info("Updating dataset %s", model["unique_id"]) + return client.get_dataset(dataset["id"]) + + _logger.info("Creating dataset %s", model["unique_id"]) + try: + dataset = create_dataset(client, database, model) + return dataset + except Exception as excinfo: + _logger.exception("Unable to create dataset") + raise CLIError("Unable to create dataset", 1) from excinfo + + +def get_certification_info( + model_kwargs: Dict[str, Any], + certification: Optional[Dict[str, Any]] = None, +) -> Optional[Dict[str, Any]]: + """ + Returns the certification information for a dataset. + """ + try: + certification_details = model_kwargs["extra"].pop("certification") + except KeyError: + certification_details = certification or DEFAULT_CERTIFICATION + return certification_details + + +def compute_metrics( + dataset_metrics: List[Any], + dbt_metrics: List[Any], + reload_columns: bool, + merge_metadata: bool, +) -> List[Any]: + """ + Compute the final list of metrics that should be used to update the dataset + + reload_columns (default): dbt data synced & Preset-only metadata deleted + merge_metadata: dbt data synced & Preset-only metadata preserved + if both are false: Preset metadata preserved & dbt-only metadata synced + """ + current_dataset_metrics = { + metric["metric_name"]: metric for metric in dataset_metrics + } + model_metrics = {metric["metric_name"]: metric for metric in dbt_metrics} + final_dataset_metrics = [] + + for name, metric_definition in model_metrics.items(): + if reload_columns or merge_metadata or name not in current_dataset_metrics: + if name in current_dataset_metrics: + metric_definition["id"] = current_dataset_metrics[name]["id"] + final_dataset_metrics.append(metric_definition) + + # Preserving Superset metadata + if not reload_columns: + for name, metric in current_dataset_metrics.items(): + if not merge_metadata or name not in model_metrics: + # remove data that is not part of the update payload + metric = clean_metadata(metric) + final_dataset_metrics.append(metric) + + return final_dataset_metrics + + +def compute_columns( + dataset_columns: List[Any], + refreshed_columns_list: List[Any], +) -> List[Any]: + """ + Refresh the list of columns preserving Preset configurations + """ + final_dataset_columns = [] + + current_dataset_columns = { + column["column_name"]: column for column in dataset_columns + } + refreshed_columns = { + column["column_name"]: column for column in refreshed_columns_list + } + for name, column in refreshed_columns.items(): + if name in current_dataset_columns: + cleaned_column = clean_metadata(current_dataset_columns[name]) + final_dataset_columns.append(cleaned_column) + else: + cleaned_column = clean_metadata(column) + final_dataset_columns.append(cleaned_column) + + return final_dataset_columns + + +def compute_columns_metadata( + dbt_columns: List[Any], + dataset_columns: List[Any], + reload_columns: bool, + merge_metadata: bool, +) -> List[Any]: + """ + Adds dbt metadata to dataset columns. + + reload_columns (default): dbt data synced & Preset-only metadata deleted + merge_metadata: dbt data synced & Preset-only metadata preserved + if both are false: Preset metadata preserved & dbt-only metadata synced + """ + dbt_metadata = { + column["name"]: { + key: column[key] for key in ("description", "meta") if key in column + } + for column in dbt_columns + } + for column, definition in dbt_metadata.items(): + dbt_metadata[column]["verbose_name"] = column + for key, value in definition.pop("meta", {}).get("superset", {}).items(): + dbt_metadata[column][key] = value + + for column in dataset_columns: + name = column["column_name"] + if name in dbt_metadata: + for key, value in dbt_metadata[name].items(): + if reload_columns or merge_metadata or not column.get(key): + column[key] = value + + # remove data that is not part of the update payload + column = clean_metadata(column) + + # for some reason this is being sent as null sometimes + # https://github.com/preset-io/backend-sdk/issues/163 + if "is_active" in column and column["is_active"] is None: + del column["is_active"] + + return dataset_columns + + +def compute_dataset_metadata( # pylint: disable=too-many-arguments + model: Dict[str, Any], + certification: Optional[Dict[str, Any]], + disallow_edits: bool, + final_dataset_metrics: List[Any], + base_url: Optional[URL], + final_dataset_columns: List[Any], +) -> Dict[str, Any]: + """ + Returns the dataset metadata based on the model information + """ + # load Superset-specific metadata from dbt model definition (model.meta.superset) + model_kwargs = model.get("meta", {}).pop("superset", {}) + certification_details = get_certification_info(model_kwargs, certification) + extra = { + "unique_id": model["unique_id"], + "depends_on": "ref('{name}')".format(**model), + **model_kwargs.pop( + "extra", + {}, + ), + } + if certification_details: + extra["certification"] = certification_details + + # update dataset metadata + update = { + "description": model.get("description", ""), + "extra": json.dumps(extra), + "is_managed_externally": disallow_edits, + "metrics": final_dataset_metrics, + **model_kwargs, # include additional model metadata defined in model.meta.superset + } + if base_url: + fragment = "!/model/{unique_id}".format(**model) + update["external_url"] = str(base_url.with_fragment(fragment)) + if final_dataset_columns: + update["columns"] = final_dataset_columns + + return update + + +def sync_datasets( # pylint: disable=too-many-arguments, too-many-locals # noqa:C901 client: SupersetClient, models: List[ModelSchema], metrics: Dict[str, List[SupersetMetricDefinition]], @@ -87,96 +288,50 @@ def sync_datasets( # pylint: disable=too-many-locals, too-many-branches, too-ma certification: Optional[Dict[str, Any]] = None, reload_columns: bool = True, merge_metadata: bool = False, + sync_columns: bool = False, ) -> Tuple[List[Any], List[str]]: """ Read the dbt manifest and import models as datasets with metrics. """ base_url = URL(external_url_prefix) if external_url_prefix else None - - # add datasets datasets = [] failed_datasets = [] - for model in models: - # load additional metadata from dbt model definition - model_kwargs = model.get("meta", {}).pop("superset", {}) + for model in models: + # get corresponding dataset try: - certification_details = model_kwargs["extra"].pop("certification") - except KeyError: - certification_details = certification or DEFAULT_CERTIFICATION - - certification_info = {"certification": certification_details} - - filters = { - "database": OneToMany(database["id"]), - "schema": model["schema"], - "table_name": model.get("alias") or model["name"], - } - existing = client.get_datasets(**filters) - if len(existing) > 1: - raise Exception("More than one dataset found") - - if existing: - dataset = existing[0] - _logger.info("Updating dataset %s", model["unique_id"]) - else: - _logger.info("Creating dataset %s", model["unique_id"]) - try: - dataset = create_dataset(client, database, model) - except Exception: # pylint: disable=broad-except - _logger.exception("Unable to create dataset") - failed_datasets.append(model["unique_id"]) - continue + dataset = get_or_create_dataset(client, model, database) + except CLIError: + failed_datasets.append(model["unique_id"]) + continue - extra = { - "unique_id": model["unique_id"], - "depends_on": "ref('{name}')".format(**model), - **( - certification_info - if certification_info["certification"] is not None - else {} - ), - **model_kwargs.pop( - "extra", - {}, - ), # include any additional or custom field specified in model.meta.superset.extra - } + # compute metrics + final_dataset_metrics = compute_metrics( + dataset["metrics"], + metrics.get(model["unique_id"], []), + reload_columns, + merge_metadata, + ) + + # compute columns + final_dataset_columns = [] + if not reload_columns and sync_columns: + refreshed_columns_list = client.get_refreshed_dataset_columns(dataset["id"]) + final_dataset_columns = compute_columns( + dataset["columns"], + refreshed_columns_list, + ) - dataset_metrics = [] - current_metrics = {} - model_metrics = { - metric["metric_name"]: metric - for metric in metrics.get(model["unique_id"], []) - } + # compute update payload + update = compute_dataset_metadata( + model, + certification, + disallow_edits, + final_dataset_metrics, + base_url, + final_dataset_columns, + ) - if not reload_columns: - current_metrics = { - metric["metric_name"]: metric - for metric in client.get_dataset(dataset["id"])["metrics"] - } - for name, metric in current_metrics.items(): - # remove data that is not part of the update payload - metric = clean_metadata(metric) - if not merge_metadata or name not in model_metrics: - dataset_metrics.append(metric) - - for name, metric_definition in model_metrics.items(): - if reload_columns or name not in current_metrics or merge_metadata: - if merge_metadata and name in current_metrics: - metric_definition["id"] = current_metrics[name]["id"] - dataset_metrics.append(metric_definition) - - # update dataset metadata from dbt and clearing metrics - update = { - "description": model.get("description", ""), - "extra": json.dumps(extra), - "is_managed_externally": disallow_edits, - "metrics": [] if reload_columns else dataset_metrics, - **model_kwargs, # include additional model metadata defined in model.meta.superset - } - if base_url: - fragment = "!/model/{unique_id}".format(**model) - update["external_url"] = str(base_url.with_fragment(fragment)) try: client.update_dataset( dataset["id"], override_columns=reload_columns, **update @@ -185,41 +340,24 @@ def sync_datasets( # pylint: disable=too-many-locals, too-many-branches, too-ma failed_datasets.append(model["unique_id"]) continue - if reload_columns and dataset_metrics: - update = { - "metrics": dataset_metrics, - } - try: - client.update_dataset(dataset["id"], override_columns=False, **update) - except SupersetError: - failed_datasets.append(model["unique_id"]) - continue - - # update column descriptions - if columns := model.get("columns"): - column_metadata = {column["name"]: column for column in columns} - current_columns = client.get_dataset(dataset["id"])["columns"] - for column in current_columns: - name = column["column_name"] - if name in column_metadata: - column["description"] = column_metadata[name].get("description", "") - column["verbose_name"] = column_metadata[name].get("name", "") - - # remove data that is not part of the update payload - column = clean_metadata(column) - - # for some reason this is being sent as null sometimes - # https://github.com/preset-io/backend-sdk/issues/163 - if "is_active" in column and column["is_active"] is None: - del column["is_active"] + # update column metadata + if dbt_columns := model.get("columns"): + current_dataset_columns = client.get_dataset(dataset["id"])["columns"] + dataset_columns = compute_columns_metadata( + dbt_columns, + current_dataset_columns, + reload_columns, + merge_metadata, + ) try: client.update_dataset( dataset["id"], override_columns=reload_columns, - columns=current_columns, + columns=dataset_columns, ) except SupersetError: failed_datasets.append(model["unique_id"]) + continue datasets.append(dataset) diff --git a/tests/api/clients/superset_test.py b/tests/api/clients/superset_test.py index c5bd1db6..1e024d1b 100644 --- a/tests/api/clients/superset_test.py +++ b/tests/api/clients/superset_test.py @@ -1422,6 +1422,57 @@ def test_get_dataset(mocker: MockerFixture) -> None: get_resource.assert_called_with("dataset", 1) +def test_get_refreshed_dataset_columns(requests_mock: Mocker) -> None: + """ + Test the ``get_refreshed_dataset_columns`` method. + """ + expected_response = [ + { + "column_name": "order_number", + "name": "order_number", + "type": "BIGINT", + "nullable": True, + "default": None, + "autoincrement": False, + "comment": None, + "type_generic": 0, + "is_dttm": False, + }, + { + "column_name": "quantity_ordered", + "name": "quantity_ordered", + "type": "BIGINT", + "nullable": True, + "default": None, + "autoincrement": False, + "comment": None, + "type_generic": 0, + "is_dttm": False, + }, + { + "column_name": "price_each", + "name": "price_each", + "type": "DOUBLE PRECISION", + "nullable": True, + "default": None, + "autoincrement": False, + "comment": None, + "type_generic": 0, + "is_dttm": False, + }, + ] + requests_mock.get( + "https://superset.example.org/datasource/external_metadata/table/1", + json=expected_response, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_refreshed_dataset_columns(1) + assert response == expected_response + + def test_get_datasets(mocker: MockerFixture) -> None: """ Test the ``get_datasets`` method. diff --git a/tests/cli/superset/sync/dbt/command_test.py b/tests/cli/superset/sync/dbt/command_test.py index 05e9200d..f9f180b6 100644 --- a/tests/cli/superset/sync/dbt/command_test.py +++ b/tests/cli/superset/sync/dbt/command_test.py @@ -1434,6 +1434,7 @@ def test_dbt_core(mocker: MockerFixture, fs: FakeFilesystem) -> None: "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -1760,6 +1761,7 @@ def test_dbt_core_preserve_metadata( "", reload_columns=False, merge_metadata=False, + sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -1843,6 +1845,7 @@ def test_dbt_core_preserve_columns( "", reload_columns=False, merge_metadata=False, + sync_columns=False, ) @@ -1927,6 +1930,7 @@ def test_dbt_core_merge_metadata( "", reload_columns=False, merge_metadata=True, + sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -2013,6 +2017,7 @@ def test_dbt_core_raise_failures_flag_no_failures( "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) list_failed_models.assert_not_called() @@ -2094,6 +2099,7 @@ def test_dbt_core_raise_failures_flag_deprecation_warning( "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) list_failed_models.assert_not_called() @@ -2177,6 +2183,7 @@ def test_dbt_core_raise_failures_flag_with_failures( "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) @@ -2258,6 +2265,7 @@ def test_dbt_core_raise_failures_flag_with_failures_and_deprecation( "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) @@ -2549,6 +2557,7 @@ def test_dbt(mocker: MockerFixture, fs: FakeFilesystem) -> None: "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -2856,6 +2865,7 @@ def test_dbt_cloud(mocker: MockerFixture) -> None: "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) @@ -2912,6 +2922,7 @@ def test_dbt_cloud_preserve_metadata(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=False, + sync_columns=False, ) @@ -2968,6 +2979,7 @@ def test_dbt_cloud_preserve_columns(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=False, + sync_columns=False, ) @@ -3024,6 +3036,7 @@ def test_dbt_cloud_merge_metadata(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=True, + sync_columns=False, ) @@ -3083,6 +3096,7 @@ def test_dbt_cloud_raise_failures_flag_no_failures(mocker: MockerFixture) -> Non "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) list_failed_models.assert_not_called() @@ -3145,6 +3159,7 @@ def test_dbt_cloud_raise_failures_flag_with_failures(mocker: MockerFixture) -> N "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) @@ -3204,6 +3219,64 @@ def test_dbt_cloud_preserve_and_merge(mocker: MockerFixture) -> None: assert "can't be combined. Please include only one to the command." in result.output +def test_dbt_cloud_sync_columns(mocker: MockerFixture) -> None: + """ + Test the ``dbt-cloud`` command with the ``--sync_columns`` flag. + """ + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.SupersetClient", + ) + superset_client = SupersetClient() + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + DBTClient = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.DBTClient", + ) + dbt_client = DBTClient() + sync_datasets = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_datasets", + return_value=([], []), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.get_job", + return_value={"id": 123, "name": "My job", "environment_id": 456}, + ) + + dbt_client.get_models.return_value = dbt_cloud_models + dbt_client.get_og_metrics.return_value = dbt_cloud_metrics + database = mocker.MagicMock() + superset_client.get_databases.return_value = [database] + superset_client.get_database.return_value = database + + runner = CliRunner() + result = runner.invoke( + superset_cli, + [ + "https://superset.example.org/", + "sync", + "dbt-cloud", + "XXX", + "1", + "2", + "123", + "--preserve-metadata", + "--sync-columns", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + sync_datasets.assert_called_with( + superset_client, + dbt_cloud_models, + superset_metrics, + database, + False, + "", + reload_columns=False, + merge_metadata=False, + sync_columns=True, + ) + + def test_dbt_cloud_no_job_id(mocker: MockerFixture) -> None: """ Test the ``dbt-cloud`` command when no job ID is specified. @@ -3257,6 +3330,7 @@ def test_dbt_cloud_no_job_id(mocker: MockerFixture) -> None: "", reload_columns=True, merge_metadata=False, + sync_columns=False, ) diff --git a/tests/cli/superset/sync/dbt/datasets_test.py b/tests/cli/superset/sync/dbt/datasets_test.py index adb27b47..08d955e1 100644 --- a/tests/cli/superset/sync/dbt/datasets_test.py +++ b/tests/cli/superset/sync/dbt/datasets_test.py @@ -3,23 +3,31 @@ """ # pylint: disable=invalid-name, too-many-lines -import copy import json -from typing import Dict, List, cast +from typing import Any, Dict, List, cast from unittest import mock import pytest from pytest_mock import MockerFixture from sqlalchemy.engine.url import make_url +from yarl import URL from preset_cli.api.clients.dbt import MetricSchema, ModelSchema from preset_cli.api.clients.superset import SupersetMetricDefinition from preset_cli.cli.superset.sync.dbt.datasets import ( + DEFAULT_CERTIFICATION, + clean_metadata, + compute_columns, + compute_columns_metadata, + compute_dataset_metadata, + compute_metrics, create_dataset, + get_certification_info, + get_or_create_dataset, model_in_database, sync_datasets, ) -from preset_cli.exceptions import ErrorLevel, ErrorPayload, SupersetError +from preset_cli.exceptions import CLIError, ErrorLevel, ErrorPayload, SupersetError metric_schema = MetricSchema() @@ -36,6 +44,64 @@ ], } +superset_metrics: List[SupersetMetricDefinition] = [ + { + "description": "Superset desc", + "expression": "COUNT(*)", + "metric_name": "cnt", + "id": 1, + "verbose_name": "Count from Superset", + }, + { + "description": "Superset desc", + "expression": "SUM(price_each)", + "metric_name": "revenue", + "id": 2, + "verbose_name": "SUM from Superset", + }, +] + +dataset_columns = [ + { + "advanced_data_type": None, + "changed_on": "2024-01-23T20:29:33.945074", + "column_name": "product_line", + "created_on": "2024-01-23T20:29:33.945070", + "description": "Description for Product Line", + "expression": None, + "extra": "{}", + "filterable": True, + "groupby": True, + "id": 1, + "is_active": True, + "is_dttm": False, + "python_date_format": None, + "type": None, + "type_generic": None, + "uuid": "5b0dc14a-8c1a-4fcb-8791-3a955d8609d3", + "verbose_name": None, + }, + { + "advanced_data_type": None, + "changed_on": "2024-01-03T13:30:19.139128", + "column_name": "id", + "created_on": "2021-12-22T16:59:38.825689", + "description": "Description for ID", + "expression": None, + "extra": "{}", + "filterable": True, + "groupby": False, + "id": 2, + "is_active": None, + "is_dttm": False, + "python_date_format": None, + "type": "INTEGER", + "type_generic": None, + "uuid": "a2952680-2671-4a97-b608-3483cf7f11d2", + "verbose_name": None, + }, +] + model_schema = ModelSchema() models: List[ModelSchema] = [ model_schema.load( @@ -60,14 +126,13 @@ def test_sync_datasets_new(mocker: MockerFixture) -> None: """ client = mocker.MagicMock() client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } - + client.create_dataset.return_value = {"id": 1, "metrics": [], "columns": []} + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) sync_datasets( client=client, models=models, @@ -76,56 +141,22 @@ def test_sync_datasets_new(mocker: MockerFixture) -> None: disallow_edits=False, external_url_prefix="", ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], + client.create_dataset.assert_called_with( + database=1, + schema="public", + table_name="messages_channels", ) client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) @@ -136,43 +167,36 @@ def test_sync_datasets_first_update_fails(mocker: MockerFixture) -> None: Test ``sync_datasets`` with ``update_dataset`` failing in the first execution. """ client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}] client.update_dataset.side_effect = [SupersetError([error])] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } - - sync_datasets( + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + working, failed = sync_datasets( client=client, models=models, metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, + database={"id": 1}, disallow_edits=False, external_url_prefix="", ) - client.create_dataset.assert_called_with( - database=1, - schema="public", - table_name="messages_channels", - ) + client.update_dataset.assert_called_with( 1, override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], + **compute_dataset_metadata_mock(), ) + assert working == [] + assert failed == [models[0]["unique_id"]] def test_sync_datasets_second_update_fails(mocker: MockerFixture) -> None: @@ -180,159 +204,67 @@ def test_sync_datasets_second_update_fails(mocker: MockerFixture) -> None: Test ``sync_datasets`` with ``update_dataset`` failing in the second execution. """ client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}] client.update_dataset.side_effect = [mocker.MagicMock(), SupersetError([error])] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } - - sync_datasets( - client=client, - models=models, - metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, ) - client.create_dataset.assert_called_with( - database=1, - schema="public", - table_name="messages_channels", + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - ], + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", ) - - -def test_sync_datasets_third_update_fails(mocker: MockerFixture) -> None: - """ - Test ``sync_datasets`` with ``update_dataset`` failing in the third execution. - """ - client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}] - client.update_dataset.side_effect = [ - mocker.MagicMock(), - mocker.MagicMock(), - SupersetError([error]), - ] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } - - sync_datasets( + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + working, failed = sync_datasets( client=client, models=models, metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, + database={"id": 1}, disallow_edits=False, external_url_prefix="", ) - client.create_dataset.assert_called_with( - database=1, - schema="public", - table_name="messages_channels", - ) + client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) + assert working == [] + assert failed == [models[0]["unique_id"]] def test_sync_datasets_with_alias(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` when datasets has an alias. + Test ``sync_datasets`` when the model has an alias. """ client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } - + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) models_with_alias: List[ModelSchema] = [ model_schema.load( { @@ -355,171 +287,83 @@ def test_sync_datasets_with_alias(mocker: MockerFixture) -> None: disallow_edits=False, external_url_prefix="", ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="model_alias"), - ], - ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], - ), - ], + client.get_datasets.assert_called_with( + database=mock.ANY, + schema="public", + table_name="model_alias", ) + client.get_datasets.return_value = [{"id": 1}] + client.get_dataset.return_value = {"id": 1} -def test_sync_datasets_no_metrics(mocker: MockerFixture) -> None: +def test_sync_datasets_custom_certification(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` when no datasets exist yet. + Test ``sync_datasets`` with a custom certification. """ client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [{"column_name": "id", "is_dttm": False}], - } - - sync_datasets( - client=client, - models=models, - metrics={}, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - ], - ), - ], + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", ) - - -def test_sync_datasets_custom_certification(mocker: MockerFixture) -> None: - """ - Test ``sync_datasets`` with a custom certification. - """ - client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [{"column_name": "id", "is_dttm": False}], - } + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] sync_datasets( client=client, models=models, - metrics={}, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, + metrics=metrics, + database={"id": 1}, disallow_edits=False, external_url_prefix="", certification={"details": "This dataset is synced from dbt Cloud"}, ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + True, + False, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + True, + False, + ) + compute_columns_mock.assert_not_called() + compute_dataset_metadata_mock.assert_called_with( + models[0], + {"details": "This dataset is synced from dbt Cloud"}, + False, + compute_metrics_mock(), + None, + [], ) + client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_not_called() client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": { - "details": "This dataset is synced from dbt Cloud", - }, - }, - ), - is_managed_externally=False, - metrics=[], + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) @@ -558,10 +402,18 @@ def test_sync_datasets_existing(mocker: MockerFixture) -> None: Test ``sync_datasets`` when datasets already exist. """ client = mocker.MagicMock() - client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] - client.get_dataset.return_value = { - "columns": [{"column_name": "id", "is_dttm": False, "is_active": None}], - } + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) sync_datasets( client=client, @@ -572,50 +424,6 @@ def test_sync_datasets_existing(mocker: MockerFixture) -> None: external_url_prefix="", ) client.create_dataset.assert_not_called() - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - ], - ), - ], - ) def test_sync_datasets_multiple_existing(mocker: MockerFixture) -> None: @@ -625,16 +433,16 @@ def test_sync_datasets_multiple_existing(mocker: MockerFixture) -> None: client = mocker.MagicMock() client.get_datasets.return_value = [{"id": 1}, {"id": 2}] - with pytest.raises(Exception) as excinfo: - sync_datasets( - client=client, - models=models, - metrics=metrics, - database={"id": 1}, - disallow_edits=False, - external_url_prefix="", - ) - assert str(excinfo.value) == "More than one dataset found" + working, failed = sync_datasets( + client=client, + models=models, + metrics=metrics, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + ) + assert working == [] + assert failed == [models[0]["unique_id"]] def test_sync_datasets_external_url_disallow_edits(mocker: MockerFixture) -> None: @@ -642,10 +450,23 @@ def test_sync_datasets_external_url_disallow_edits(mocker: MockerFixture) -> Non Test ``sync_datasets`` when passing external URL prefix and disallow-edits. """ client = mocker.MagicMock() - client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] - client.get_dataset.return_value = { - "columns": [{"column_name": "id", "is_dttm": False}], - } + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, + ) + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] sync_datasets( client=client, @@ -655,52 +476,42 @@ def test_sync_datasets_external_url_disallow_edits(mocker: MockerFixture) -> Non disallow_edits=True, external_url_prefix="https://dbt.example.org/", ) + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + True, + False, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + True, + False, + ) + compute_columns_mock.assert_not_called() + compute_dataset_metadata_mock.assert_called_with( + models[0], + None, + True, + compute_metrics_mock(), + URL("https://dbt.example.org/"), + [], + ) client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_not_called() client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=True, - metrics=[], - external_url=( - "https://dbt.example.org/" - "#!/model/model.superset_examples.messages_channels" - ), - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) @@ -709,291 +520,372 @@ def test_sync_datasets_external_url_disallow_edits(mocker: MockerFixture) -> Non def test_sync_datasets_preserve_metadata(mocker: MockerFixture) -> None: """ Test ``sync_datasets`` with preserve_metadata set to True. - Metrics should be merged (Preset as the source of truth). """ client = mocker.MagicMock() - metrics_ = copy.deepcopy(metrics) - metrics_["model.superset_examples.messages_channels"].append( - metric_schema.load( - { - "description": "", - "expression": "MAX(id)", - "extra": "{}", - "metric_name": "max_id", - "metric_type": "max", - "verbose_name": "", - }, - ), + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, ) - client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] - client.get_dataset.return_value = { - "columns": [ - { - "column_name": "id", - "is_dttm": False, - "filterable": False, - "groupby": False, - }, - ], - "metrics": [ - { - "changed_on": "2023-08-28T18:01:24.190211", - "created_on": "2023-08-28T18:01:24.190208", - "expression": "count(*)/1", - "id": 190, - "metric_name": "cnt", - }, - { - "changed_on": "2023-08-27T18:01:24.190211", - "created_on": "2023-08-27T18:01:24.190208", - "expression": "COUNT (DISTINCT id)", - "id": 191, - "metric_name": "unique_ids", - }, - ], - "id": 1, - } + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] sync_datasets( client=client, models=models, - metrics=metrics_, + metrics=metrics, database={"id": 1}, disallow_edits=False, - external_url_prefix="https://dbt.example.org/", + external_url_prefix="", reload_columns=False, + merge_metadata=False, + ) + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + False, + False, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + False, + False, + ) + compute_columns_mock.assert_not_called() + compute_dataset_metadata_mock.assert_called_with( + models[0], + None, + False, + compute_metrics_mock(), + None, + [], ) client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_not_called() client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=False, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[ - { - "expression": "count(*)/1", - "id": 190, - "metric_name": "cnt", - }, - { - "expression": "COUNT (DISTINCT id)", - "id": 191, - "metric_name": "unique_ids", - }, - { - "expression": "MAX(id)", - "metric_name": "max_id", - "metric_type": "max", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - external_url=( - "https://dbt.example.org/" - "#!/model/model.superset_examples.messages_channels" - ), + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=False, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "filterable": False, - "groupby": False, - "is_dttm": False, - "verbose_name": "id", - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) -def test_sync_datasets_merge_metadata(mocker: MockerFixture) -> None: +def test_sync_datasets_preserve_metadata_sync_columns(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` with merge_metadata set to True. - Metrics should be merged (dbt as the source of truth). + Test ``sync_datasets`` with preserve_metadata & sync_columns set to True. """ client = mocker.MagicMock() - metrics_ = copy.deepcopy(metrics) - metrics_["model.superset_examples.messages_channels"].append( - metric_schema.load( - { - "description": "", - "expression": "MAX(id)", - "extra": "{}", - "metric_name": "max_id", - "metric_type": "max", - "verbose_name": "", - }, - ), + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, ) - client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] - client.get_dataset.return_value = { - "columns": [ - { - "column_name": "id", - "is_dttm": False, - "filterable": False, - "groupby": False, - }, - ], - "metrics": [ - { - "changed_on": "2023-08-28T18:01:24.190211", - "created_on": "2023-08-28T18:01:24.190208", - "expression": "count(*)/1", - "id": 190, - "metric_name": "cnt", - }, - { - "changed_on": "2023-08-27T18:01:24.190211", - "created_on": "2023-08-27T18:01:24.190208", - "expression": "COUNT (DISTINCT id)", - "id": 191, - "metric_name": "unique_ids", - }, - ], - "id": 1, - } + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] sync_datasets( client=client, models=models, - metrics=metrics_, + metrics=metrics, database={"id": 1}, disallow_edits=False, - external_url_prefix="https://dbt.example.org/", + external_url_prefix="", reload_columns=False, - merge_metadata=True, + merge_metadata=False, + sync_columns=True, + ) + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + False, + False, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + False, + False, + ) + compute_columns_mock.assert_called_with( + get_or_create_dataset_mock()["columns"], + client.get_refreshed_dataset_columns(get_or_create_dataset_mock()["id"]), + ) + compute_dataset_metadata_mock.assert_called_with( + models[0], + None, + False, + compute_metrics_mock(), + None, + compute_columns_mock(), ) client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_called_with( + get_or_create_dataset_mock()["id"], + ) client.update_dataset.assert_has_calls( [ mock.call( 1, override_columns=False, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[ - { - "expression": "COUNT (DISTINCT id)", - "id": 191, - "metric_name": "unique_ids", - }, - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - "id": 190, - }, - { - "expression": "MAX(id)", - "metric_name": "max_id", - "metric_type": "max", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - external_url=( - "https://dbt.example.org/" - "#!/model/model.superset_examples.messages_channels" - ), + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=False, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "filterable": False, - "groupby": False, - "is_dttm": False, - "verbose_name": "id", - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) -def test_sync_datasets_no_columns(mocker: MockerFixture) -> None: +def test_sync_datasets_merge_metadata(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` when passing external URL prefix. + Test ``sync_datasets`` with merge_metadata set to True. """ client = mocker.MagicMock() - client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, + ) + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] - modified_models = [models[0].copy()] - modified_models[0]["columns"] = {} + sync_datasets( + client=client, + models=models, + metrics=metrics, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + reload_columns=False, + merge_metadata=True, + ) + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + False, + True, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + False, + True, + ) + compute_columns_mock.assert_not_called() + compute_dataset_metadata_mock.assert_called_with( + models[0], + None, + False, + compute_metrics_mock(), + None, + [], + ) + client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_not_called() + client.update_dataset.assert_has_calls( + [ + mock.call( + 1, + override_columns=False, + **compute_dataset_metadata_mock(), + ), + mock.call( + 1, + override_columns=False, + columns=compute_columns_metadata_mock(), + ), + ], + ) + + +def test_sync_datasets_merge_metadata_sync_columns(mocker: MockerFixture) -> None: + """ + Test ``sync_datasets`` with merge_metadata & sync_columns set to True. + """ + client = mocker.MagicMock() + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, + ) + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + model_id = models[0]["unique_id"] sync_datasets( client=client, - models=modified_models, + models=models, metrics=metrics, database={"id": 1}, disallow_edits=False, external_url_prefix="", + reload_columns=False, + merge_metadata=True, + sync_columns=True, + ) + + get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + False, + True, + ) + compute_columns_metadata_mock.assert_called_with( + models[0]["columns"], + client.get_dataset()["columns"], + False, + True, + ) + compute_columns_mock.assert_called_with( + get_or_create_dataset_mock()["columns"], + client.get_refreshed_dataset_columns(get_or_create_dataset_mock()["id"]), + ) + compute_dataset_metadata_mock.assert_called_with( + models[0], + None, + False, + compute_metrics_mock(), + None, + compute_columns_mock(), ) client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_called_with( + get_or_create_dataset_mock()["id"], + ) client.update_dataset.assert_has_calls( [ mock.call( 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], + override_columns=False, + **compute_dataset_metadata_mock(), ), mock.call( 1, override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], + columns=compute_columns_metadata_mock(), ), ], ) +def test_sync_datasets_no_columns(mocker: MockerFixture) -> None: + """ + Test ``sync_datasets`` when there's no dbt metadata for columns. + """ + client = mocker.MagicMock() + modified_models = [models[0].copy()] + modified_models[0]["columns"] = {} + model_id = modified_models[0]["unique_id"] + + get_or_create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", + return_value={"id": 1, "metrics": [], "columns": []}, + ) + compute_metrics_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", + ) + compute_columns_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", + ) + compute_dataset_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", + ) + compute_columns_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", + ) + + sync_datasets( + client=client, + models=modified_models, + metrics=metrics, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + ) + + get_or_create_dataset_mock.assert_called_with(client, modified_models[0], {"id": 1}) + compute_metrics_mock.assert_called_with( + get_or_create_dataset_mock()["metrics"], + metrics[model_id], + True, + False, + ) + compute_columns_mock.assert_not_called() + compute_dataset_metadata_mock.assert_called_with( + modified_models[0], + None, + False, + compute_metrics_mock(), + None, + [], + ) + compute_columns_metadata_mock.assert_not_called() + client.create_dataset.assert_not_called() + client.get_refreshed_dataset_columns.assert_not_called() + client.update_dataset.assert_called_with( + 1, + override_columns=True, + **compute_dataset_metadata_mock, + ) + + def test_create_dataset_physical(mocker: MockerFixture) -> None: """ Test ``create_dataset`` for physical datasets. @@ -1045,389 +937,608 @@ def test_create_dataset_virtual(mocker: MockerFixture) -> None: ) -def test_model_in_database() -> None: +def test_get_or_create_dataset_existing_dataset(mocker: MockerFixture) -> None: """ - Test the ``model_in_database`` helper. + Test the ``get_or_create_dataset`` helper when it finds a dataset. """ - url = make_url("bigquery://project-1") - assert model_in_database(cast(ModelSchema, {"database": "project-1"}), url) - assert not model_in_database(cast(ModelSchema, {"database": "project-2"}), url) + client = mocker.MagicMock() + # create_dataset = mocker.patch("preset_cli.cli.superset.sync.dbt.datasets.create_engine") + client.get_datasets.return_value = [{"id": 1}] + client.get_dataset.return_value = {"id": 1} + database = {"id": 1} + result = get_or_create_dataset(client, models[0], database) + assert result == client.get_dataset.return_value + client.create_dataset.assert_not_called() - url = make_url("snowflake://user:password@host/db1") - assert model_in_database(cast(ModelSchema, {"database": "db1"}), url) - assert not model_in_database(cast(ModelSchema, {"database": "db2"}), url) +def test_get_or_create_dataset_multiple_datasets(mocker: MockerFixture) -> None: + """ + Test the ``get_or_create_dataset`` helper when it finds many datasets. + """ + client = mocker.MagicMock() + client.get_datasets.return_value = [{"id": 1}, {"id": 2}] + database = {"id": 1} + with pytest.raises(CLIError) as excinfo: + get_or_create_dataset(client, models[0], database) + assert excinfo.type == CLIError + assert excinfo.value.exit_code == 1 + assert "More than one dataset found" in str(excinfo.value) + client.get_dataset.assert_not_called() + client.create_dataset.assert_not_called() -def test_sync_datasets_null_certification(mocker: MockerFixture) -> None: + +def test_get_or_create_dataset_no_datasets(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` with no certification info + Test the ``get_or_create_dataset`` helper when the dataset doesn't exist. """ client = mocker.MagicMock() client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } + client.get_dataset.return_value = {"id": 1} + create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.create_dataset", + ) + database = {"id": 1} + result = get_or_create_dataset(client, models[0], database) + assert result == create_dataset_mock.return_value + create_dataset_mock.assert_called_with(client, database, models[0]) - models_with_null_certification: List[ModelSchema] = [ - model_schema.load( - { - "database": "examples_dev", - "schema": "public", - "description": "", - "meta": {"superset": {"extra": {"certification": None}}}, - "name": "messages_channels", - "unique_id": "model.superset_examples.messages_channels", - "columns": [{"name": "id", "description": "Primary key"}], + +def test_get_or_create_dataset_creation_failure(mocker: MockerFixture) -> None: + """ + Test the ``get_or_create_dataset`` helper when it fails to create a dataset. + """ + client = mocker.MagicMock() + client.get_datasets.return_value = [] + create_dataset_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.create_dataset", + ) + create_dataset_mock.side_effect = Exception("Error") + database = {"id": 1} + with pytest.raises(CLIError) as excinfo: + get_or_create_dataset(client, models[0], database) + assert excinfo.type == CLIError + assert excinfo.value.exit_code == 1 + assert "Unable to create dataset" in str(excinfo.value) + client.get_dataset.assert_not_called() + create_dataset_mock.assert_called_with(client, database, models[0]) + + +def test_get_certification_info_from_model() -> None: + """ + Test the ``get_certification_info`` helper when the model contains certification info. + """ + model = models[0].copy() + model["meta"] = { + "extra": { + "certification": { + "details": "I declare it certified", + "certified_by": "Myself", }, - ), - ] - sync_datasets( - client=client, - models=models_with_null_certification, - metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", + }, + } + result = get_certification_info(model["meta"], None) + assert result == {"details": "I declare it certified", "certified_by": "Myself"} + + +def test_get_certification_info_from_model_None() -> None: + """ + Test the ``get_certification_info`` helper when the model sets certification to None. + """ + model = models[0].copy() + model["meta"] = {"extra": {"certification": None}} + result = get_certification_info(model["meta"], None) + assert result is None + + +def test_get_certification_info_from_arg() -> None: + """ + Test the ``get_certification_info`` helper when certification is passed via parameter. + """ + certification = {"details": "Trust it", "certified_by": "Me"} + result = get_certification_info(models[0], certification) + assert result == certification + + +def test_get_certification_info_default() -> None: + """ + Test the ``get_certification_info`` helper when using default certification value. + """ + result = get_certification_info(models[0], None) + assert result == DEFAULT_CERTIFICATION + + +def test_compute_metrics_reload_not_merge() -> None: + """ + Test the ``compute_metrics`` helper with the default flow. + """ + result = compute_metrics( + [], + metrics["model.superset_examples.messages_channels"], + True, + False, ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], + assert result == metrics["model.superset_examples.messages_channels"] + + +def test_compute_metrics_reload_not_merge_with_superset_metrics() -> None: + """ + Test the ``compute_metrics`` helper with the default flow + existing metrics. + """ + result = compute_metrics( + superset_metrics, + metrics["model.superset_examples.messages_channels"], + True, + False, ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], - ), - ], + assert result == metrics["model.superset_examples.messages_channels"] + + +def test_compute_metrics_reload_not_merge_with_no_dbt_metrics() -> None: + """ + Test the ``compute_metrics`` helper with the default flow + no dbt metrics. + """ + result = compute_metrics( + superset_metrics, + [], + True, + False, ) + assert result == [] -def test_sync_datasets_model_certification(mocker: MockerFixture) -> None: +def test_compute_metrics_reload_not_merge_with_conflict() -> None: """ - Test ``sync_datasets`` with certification from model definition + Test the ``compute_metrics`` helper with the default flow + conflict. """ - client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } + result = compute_metrics( + superset_metrics, + metrics["model.superset_examples.messages_channels"], + True, + False, + ) + assert result == metrics["model.superset_examples.messages_channels"] - models_with_model_certification: List[ModelSchema] = [ - model_schema.load( - { - "database": "examples_dev", - "schema": "public", - "description": "", - "meta": { - "superset": { - "extra": { - "certification": { - "details": "I declare this dataset certified", - "certified_by": "Myself", - }, - }, - }, - }, - "name": "messages_channels", - "unique_id": "model.superset_examples.messages_channels", - "columns": [{"name": "id", "description": "Primary key"}], - }, - ), - ] - sync_datasets( - client=client, - models=models_with_model_certification, - metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", + +def test_compute_metrics_with_merge_without_reload() -> None: + """ + Test the ``compute_metrics`` helper with merge without reload. + """ + result = compute_metrics( + superset_metrics, + metrics["model.superset_examples.messages_channels"], + False, + True, ) - client.create_dataset.assert_has_calls( + final = metrics["model.superset_examples.messages_channels"].copy() + + for dbt_metric in final: + for superset_metric in superset_metrics: + if dbt_metric["metric_name"] == superset_metric["metric_name"]: + dbt_metric["id"] = superset_metric["id"] + + for superset_metric in superset_metrics: + keep = True + for dbt_metric in final: + if superset_metric["metric_name"] == dbt_metric["metric_name"]: + keep = False + if keep: + final.append(superset_metric) + + assert result == final + + +def test_compute_metrics_without_merge_and_reload() -> None: + """ + Test the ``compute_metrics`` helper without reload and merge. + """ + result = compute_metrics( + superset_metrics, + metrics["model.superset_examples.messages_channels"], + False, + False, + ) + final = superset_metrics.copy() + additions = [] + for superset_metric in final: + for dbt_metric in metrics["model.superset_examples.messages_channels"]: + if superset_metric["metric_name"] == dbt_metric["metric_name"]: + keep = False + if keep: + additions.append(dbt_metric) + for add in additions: + final.append(add) + assert result == final + + +def test_compute_metrics_without_merge_and_reload_no_metrics() -> None: + """ + Test the ``compute_metrics`` helper without reload and merge without Superset metrics. + """ + result = compute_metrics( + [], + metrics["model.superset_examples.messages_channels"], + False, + False, + ) + assert result == metrics["model.superset_examples.messages_channels"] + + +def test_compute_columns_no_new_columns(mocker: MockerFixture) -> None: + """ + Test the ``compute_columns`` helper when no new columns are returned. + """ + clean_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.clean_metadata", + ) + result = compute_columns(dataset_columns, dataset_columns) + assert result == [ + clean_metadata_mock(dataset_columns[0]), + clean_metadata_mock(dataset_columns[1]), + ] + clean_metadata_mock.assert_has_calls( [ - mock.call(database=1, schema="public", table_name="messages_channels"), + mock.call(dataset_columns[0]), + mock.call(dataset_columns[1]), ], ) - client.update_dataset.assert_has_calls( + + +def test_compute_columns_new_columns(mocker: MockerFixture) -> None: + """ + Test the ``compute_columns`` helper when new columns are returned. + """ + clean_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.clean_metadata", + ) + refreshed_list = dataset_columns.copy() + refreshed_list.append( + { + "advanced_data_type": None, + "changed_on": "2024-01-03T13:30:19.012896", + "column_name": "price_each", + "created_on": "2021-12-22T16:59:38.420192", + "description": None, + "expression": None, + "extra": "{}", + "filterable": True, + "groupby": True, + "id": 341, + "is_active": None, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "type_generic": 0, + "uuid": "309aed93-6b56-4519-b9f4-0271ec0d2d60", + "verbose_name": None, + }, + ) + result = compute_columns(dataset_columns, refreshed_list) + assert result == [ + clean_metadata_mock(dataset_columns[0]), + clean_metadata_mock(dataset_columns[1]), + clean_metadata_mock(refreshed_list[2]), + ] + clean_metadata_mock.assert_has_calls( [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": { - "details": "I declare this dataset certified", - "certified_by": "Myself", - }, - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], - ), + mock.call(dataset_columns[0]), + mock.call(dataset_columns[1]), + mock.call(refreshed_list[2]), ], ) -def test_sync_datasets_warning(mocker: MockerFixture) -> None: +def test_compute_columns_column_removed(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` with warning information + Test the ``compute_columns`` helper when a column is removed. """ - client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], - } + clean_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.clean_metadata", + ) + refreshed_list = dataset_columns.copy() + refreshed_list.pop(1) + result = compute_columns(dataset_columns, refreshed_list) + assert result == [ + clean_metadata_mock(dataset_columns[0]), + ] + clean_metadata_mock.assert_called_with(dataset_columns[0]) - models_with_warning: List[ModelSchema] = [ - model_schema.load( - { - "database": "examples_dev", - "schema": "public", - "description": "", - "meta": { - "superset": {"extra": {"warning_markdown": "Under Construction"}}, + +def test_compute_columns_no_columns(mocker: MockerFixture) -> None: + """ + Test the ``compute_columns`` helper when no column is returned. + """ + clean_metadata_mock = mocker.patch( + "preset_cli.cli.superset.sync.dbt.datasets.clean_metadata", + ) + result = compute_columns(dataset_columns, []) + assert result == [] + clean_metadata_mock.assert_not_called() + + +def test_compute_columns_metadata_with_reload_no_merge() -> None: + """ + Test the ``compute_columns_metadata`` helper with reload without merge. + """ + result = compute_columns_metadata( + models[0]["columns"], + dataset_columns, + True, + False, + ) + + final = dataset_columns.copy() + + for column in final: + for dbt_column in models[0]["columns"]: + if column["column_name"] == dbt_column["name"]: + column["verbose_name"] = dbt_column["name"] + column["description"] = dbt_column["description"] + + assert result == final + + +def test_compute_columns_metadata_without_reload_with_merge() -> None: + """ + Test the ``compute_columns_metadata`` helper without reload with merge. + """ + result = compute_columns_metadata( + models[0]["columns"], + dataset_columns, + False, + True, + ) + + final = dataset_columns.copy() + + for column in final: + for dbt_column in models[0]["columns"]: + if column["column_name"] == dbt_column["name"]: + column["verbose_name"] = dbt_column["name"] + column["description"] = dbt_column["description"] + + assert result == final + + +def test_compute_columns_metadata_without_reload_and_merge() -> None: + """ + Test the ``compute_columns_metadata`` helper without reload and merge. + """ + result = compute_columns_metadata( + models[0]["columns"], + dataset_columns, + False, + False, + ) + assert result == dataset_columns + + +def test_compute_columns_metadata_without_reload_and_merge_other() -> None: + """ + Test the ``compute_columns_metadata`` helper without reload and merge + validating that dbt-specific metadata is still synced. + """ + modified_columns = dataset_columns.copy() + modified_columns[1]["description"] = None + result = compute_columns_metadata( + models[0]["columns"], + modified_columns, + False, + False, + ) + modified_columns[1]["description"] = models[0]["columns"][0]["description"] + assert result == modified_columns + + +def test_compute_columns_metadata_dbt_column_meta() -> None: + """ + Test the ``compute_columns_metadata`` helper when inferring additional metadata + from dbt. + """ + modified_columns = models.copy() + modified_columns[0]["columns"] = [ + { + "name": "id", + "description": "Primary key", + "meta": { + "superset": { + "groupby": True, }, - "name": "messages_channels", - "unique_id": "model.superset_examples.messages_channels", - "columns": [{"name": "id", "description": "Primary key"}], }, - ), + }, ] - sync_datasets( - client=client, - models=models_with_warning, - metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", - ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], - ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - "warning_markdown": "Under Construction", - }, - ), - is_managed_externally=False, - metrics=[], - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], - ), - ], + result = compute_columns_metadata( + modified_columns[0]["columns"], + dataset_columns, + False, + False, ) + expected = dataset_columns.copy() + expected[1]["groupby"] = True + assert result == expected -def test_sync_datasets_meta_test(mocker: MockerFixture) -> None: +def test_compute_dataset_metadata_with_dbt_metadata() -> None: """ - Test ``sync_datasets`` with an additional field set through meta.superset + Test the ``compute_dataset_metadata`` helper with metadata from dbt. """ - client = mocker.MagicMock() - client.get_datasets.return_value = [] - client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] - client.get_dataset.return_value = { - "columns": [ - {"column_name": "id", "is_dttm": False, "type_generic": "INTEGER"}, - {"column_name": "ts", "is_dttm": True, "type_generic": "TIMESTAMP"}, - ], + model = { + "database": "examples_dev", + "schema": "public", + "description": "", + "meta": { + "superset": { + "extra": {"warning_markdown": "Under Construction"}, + "cache_timeout": 300, + }, + }, + "name": "messages_channels", + "unique_id": "model.superset_examples.messages_channels", + "columns": [{"name": "id", "description": "Primary key"}], } + model_name = model["name"] + result = compute_dataset_metadata( + model, + {}, + False, + superset_metrics, + None, + [], + ) - models_with_meta_info: List[ModelSchema] = [ - model_schema.load( + assert result == { + "description": model["description"], + "extra": json.dumps( { - "database": "examples_dev", - "schema": "public", - "description": "", - "meta": {"superset": {"cache_timeout": 250}}, - "name": "messages_channels", - "unique_id": "model.superset_examples.messages_channels", - "columns": [{"name": "id", "description": "Primary key"}], + "unique_id": model["unique_id"], + "depends_on": f"ref('{model_name}')", + "warning_markdown": "Under Construction", + "certification": DEFAULT_CERTIFICATION, }, ), - ] - sync_datasets( - client=client, - models=models_with_meta_info, - metrics=metrics, - database={"id": 1, "sqlalchemy_uri": "postgresql://user@host/examples_dev"}, - disallow_edits=False, - external_url_prefix="", + "is_managed_externally": False, + "metrics": superset_metrics, + "cache_timeout": 300, + } + + +def test_compute_dataset_metadata_with_certification_and_columns() -> None: + """ + Test the ``compute_dataset_metadata`` helper with certification info and columns. + """ + model: Dict[str, Any] = { + "database": "examples_dev", + "schema": "public", + "description": "", + "meta": {"superset": {"extra": {"certification": {"details": "Trust this"}}}}, + "name": "messages_channels", + "unique_id": "model.superset_examples.messages_channels", + "columns": [{"name": "id", "description": "Primary key"}], + } + model_name = model["name"] + certification_details = model["meta"]["superset"]["extra"]["certification"] + result = compute_dataset_metadata( + model, + {}, + False, + superset_metrics, + None, + dataset_columns, ) - client.create_dataset.assert_has_calls( - [ - mock.call(database=1, schema="public", table_name="messages_channels"), - ], + assert result == { + "description": model["description"], + "extra": json.dumps( + { + "unique_id": model["unique_id"], + "depends_on": f"ref('{model_name}')", + "certification": certification_details, + }, + ), + "is_managed_externally": False, + "metrics": superset_metrics, + "columns": dataset_columns, + } + + +def test_compute_dataset_metadata_with_url_disallow_edits_cert_arg() -> None: + """ + Test the ``compute_dataset_metadata`` helper with base URL, disallow edits and + a certification argument. + """ + model = { + "database": "examples_dev", + "schema": "public", + "description": "", + "meta": {}, + "name": "messages_channels", + "unique_id": "model.superset_examples.messages_channels", + "columns": [{"name": "id", "description": "Primary key"}], + } + model_name = model["name"] + certification = {"details": "This dataset is synced from dbt Cloud"} + result = compute_dataset_metadata( + model, + certification, + True, + superset_metrics, + URL("https://dbt.example.org/"), + [], ) - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=True, - description="", - extra=json.dumps( - { - "unique_id": "model.superset_examples.messages_channels", - "depends_on": "ref('messages_channels')", - "certification": {"details": "This table is produced by dbt"}, - }, - ), - is_managed_externally=False, - metrics=[], - cache_timeout=250, - ), - mock.call( - 1, - override_columns=False, - metrics=[ - { - "expression": "COUNT(*)", - "metric_name": "cnt", - "metric_type": "count", - "verbose_name": "", - "description": "", - "extra": "{}", - }, - ], - ), - mock.call( - 1, - override_columns=True, - columns=[ - { - "column_name": "id", - "description": "Primary key", - "is_dttm": False, - "verbose_name": "id", - }, - { - "column_name": "ts", - "is_dttm": True, - }, - ], - ), - ], + assert result == { + "description": model["description"], + "extra": json.dumps( + { + "unique_id": model["unique_id"], + "depends_on": f"ref('{model_name}')", + "certification": certification, + }, + ), + "is_managed_externally": True, + "metrics": superset_metrics, + "external_url": ( + "https://dbt.example.org/#!/model/model.superset_examples.messages_channels" + ), + } + + +def test_compute_dataset_metadata_null_certification() -> None: + """ + Test the ``compute_dataset_metadata`` helper with certification set to `None`. + """ + model = models[0].copy() + model["meta"] = {"superset": {"extra": {"certification": None}}} + model_name = model["name"] + result = compute_dataset_metadata( + model, + None, + False, + superset_metrics, + None, + [], ) + assert result == { + "description": model["description"], + "extra": json.dumps( + { + "unique_id": model["unique_id"], + "depends_on": f"ref('{model_name}')", + }, + ), + "is_managed_externally": False, + "metrics": superset_metrics, + } + + +def test_model_in_database() -> None: + """ + Test the ``model_in_database`` helper. + """ + url = make_url("bigquery://project-1") + assert model_in_database(cast(ModelSchema, {"database": "project-1"}), url) + assert not model_in_database(cast(ModelSchema, {"database": "project-2"}), url) + + url = make_url("snowflake://user:password@host/db1") + assert model_in_database(cast(ModelSchema, {"database": "db1"}), url) + assert not model_in_database(cast(ModelSchema, {"database": "db2"}), url) + + +def test_clean_metadata() -> None: + """ + Test the ``clean_metadata`` helper. + """ + test_data = { + "autoincrement": "Auto Increment", + "changed_on": "Changed On", + "comment": "Comment", + "to_be_kept": "To Be Kept", + "created_on": "Created On", + "default": "Default", + "name": "Name", + "nullable": "Nullable", + "type_generic": "Type Generic", + "yeah": "sure", + } + result = clean_metadata(test_data) + assert result == { + "to_be_kept": "To Be Kept", + "yeah": "sure", + } From 60db50a4442ab18fa8c8b3862a37e806dede4c89 Mon Sep 17 00:00:00 2001 From: Vitor Avila Date: Thu, 14 Mar 2024 11:01:26 -0300 Subject: [PATCH 2/2] Making sync-columns default --- .../cli/superset/sync/dbt/command.py | 16 -- .../cli/superset/sync/dbt/datasets.py | 22 ++- tests/cli/superset/sync/dbt/command_test.py | 127 --------------- tests/cli/superset/sync/dbt/datasets_test.py | 150 ------------------ 4 files changed, 10 insertions(+), 305 deletions(-) diff --git a/src/preset_cli/cli/superset/sync/dbt/command.py b/src/preset_cli/cli/superset/sync/dbt/command.py index 2c647109..16378b23 100644 --- a/src/preset_cli/cli/superset/sync/dbt/command.py +++ b/src/preset_cli/cli/superset/sync/dbt/command.py @@ -106,12 +106,6 @@ default=False, help="End the execution with an error if a model fails to sync or a deprecated feature is used", ) -@click.option( - "--sync-columns", - is_flag=True, - default=False, - help="Sync columns from source.", -) @raise_cli_errors @click.pass_context def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many-locals ,too-many-statements @@ -131,7 +125,6 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many preserve_metadata: bool = False, merge_metadata: bool = False, raise_failures: bool = False, - sync_columns: bool = False, ) -> None: """ Sync models/metrics from dbt Core to Superset and charts/dashboards to dbt exposures. @@ -252,7 +245,6 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-branches, too-many external_url_prefix, reload_columns=reload_columns, merge_metadata=merge_metadata, - sync_columns=sync_columns, ) if exposures: @@ -513,12 +505,6 @@ def fetch_sl_metrics( default=False, help="End the execution with an error if a model fails to sync or a deprecated feature is used", ) -@click.option( - "--sync-columns", - is_flag=True, - default=False, - help="Sync columns from source.", -) @click.pass_context @raise_cli_errors def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals @@ -538,7 +524,6 @@ def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals merge_metadata: bool = False, access_url: Optional[str] = None, raise_failures: bool = False, - sync_columns: bool = False, ) -> None: """ Sync models/metrics from dbt Cloud to Superset. @@ -604,7 +589,6 @@ def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals external_url_prefix, reload_columns=reload_columns, merge_metadata=merge_metadata, - sync_columns=sync_columns, ) if exposures: diff --git a/src/preset_cli/cli/superset/sync/dbt/datasets.py b/src/preset_cli/cli/superset/sync/dbt/datasets.py index c09d793b..6cc8d4ee 100644 --- a/src/preset_cli/cli/superset/sync/dbt/datasets.py +++ b/src/preset_cli/cli/superset/sync/dbt/datasets.py @@ -35,8 +35,7 @@ def model_in_database(model: ModelSchema, url: SQLAlchemyURL) -> bool: def clean_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]: """ - Remove incompatbile columns from metatada. - When creating/updating an column/metric we need to remove some fields from the payload. + Remove incompatbile columns from metatada to create/update a column/metric. """ for key in ( "autoincrement", @@ -141,9 +140,9 @@ def compute_metrics( """ Compute the final list of metrics that should be used to update the dataset - reload_columns (default): dbt data synced & Preset-only metadata deleted - merge_metadata: dbt data synced & Preset-only metadata preserved - if both are false: Preset metadata preserved & dbt-only metadata synced + reload_columns (default): dbt data synced & Superset-only metadata deleted + merge_metadata: dbt data synced & Superset-only metadata preserved + if both are false: Superset metadata preserved & dbt-only metadata synced """ current_dataset_metrics = { metric["metric_name"]: metric for metric in dataset_metrics @@ -173,7 +172,7 @@ def compute_columns( refreshed_columns_list: List[Any], ) -> List[Any]: """ - Refresh the list of columns preserving Preset configurations + Refresh the list of columns preserving existing configurations """ final_dataset_columns = [] @@ -203,9 +202,9 @@ def compute_columns_metadata( """ Adds dbt metadata to dataset columns. - reload_columns (default): dbt data synced & Preset-only metadata deleted - merge_metadata: dbt data synced & Preset-only metadata preserved - if both are false: Preset metadata preserved & dbt-only metadata synced + reload_columns (default): dbt data synced & Superset-only metadata deleted + merge_metadata: dbt data synced & Superset-only metadata preserved + if both are false: Superset metadata preserved & dbt-only metadata synced """ dbt_metadata = { column["name"]: { @@ -278,7 +277,7 @@ def compute_dataset_metadata( # pylint: disable=too-many-arguments return update -def sync_datasets( # pylint: disable=too-many-arguments, too-many-locals # noqa:C901 +def sync_datasets( # pylint: disable=too-many-locals, too-many-arguments client: SupersetClient, models: List[ModelSchema], metrics: Dict[str, List[SupersetMetricDefinition]], @@ -288,7 +287,6 @@ def sync_datasets( # pylint: disable=too-many-arguments, too-many-locals # noqa certification: Optional[Dict[str, Any]] = None, reload_columns: bool = True, merge_metadata: bool = False, - sync_columns: bool = False, ) -> Tuple[List[Any], List[str]]: """ Read the dbt manifest and import models as datasets with metrics. @@ -315,7 +313,7 @@ def sync_datasets( # pylint: disable=too-many-arguments, too-many-locals # noqa # compute columns final_dataset_columns = [] - if not reload_columns and sync_columns: + if not reload_columns: refreshed_columns_list = client.get_refreshed_dataset_columns(dataset["id"]) final_dataset_columns = compute_columns( dataset["columns"], diff --git a/tests/cli/superset/sync/dbt/command_test.py b/tests/cli/superset/sync/dbt/command_test.py index f9f180b6..6a1f2667 100644 --- a/tests/cli/superset/sync/dbt/command_test.py +++ b/tests/cli/superset/sync/dbt/command_test.py @@ -1375,7 +1375,6 @@ def test_dbt_core(mocker: MockerFixture, fs: FakeFilesystem) -> None: ) client = SupersetClient() mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") - log_warning = mocker.patch("preset_cli.cli.superset.sync.dbt.command.log_warning") sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", ) @@ -1407,13 +1406,6 @@ def test_dbt_core(mocker: MockerFixture, fs: FakeFilesystem) -> None: catch_exceptions=False, ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "Passing the manifest.json file is deprecated. " - "Please pass the dbt_project.yml file instead." - ), - DeprecationWarning, - ) sync_database.assert_called_with( client, profiles, @@ -1434,7 +1426,6 @@ def test_dbt_core(mocker: MockerFixture, fs: FakeFilesystem) -> None: "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -1700,7 +1691,6 @@ def test_dbt_core_preserve_metadata( "preset_cli.cli.superset.sync.dbt.command.SupersetClient", ) client = SupersetClient() - log_warning = mocker.patch("preset_cli.cli.superset.sync.dbt.command.log_warning") mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", @@ -1734,13 +1724,6 @@ def test_dbt_core_preserve_metadata( catch_exceptions=False, ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "Passing the manifest.json file is deprecated. " - "Please pass the dbt_project.yml file instead." - ), - DeprecationWarning, - ) sync_database.assert_called_with( client, profiles, @@ -1761,7 +1744,6 @@ def test_dbt_core_preserve_metadata( "", reload_columns=False, merge_metadata=False, - sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -1789,7 +1771,6 @@ def test_dbt_core_preserve_columns( "preset_cli.cli.superset.sync.dbt.command.SupersetClient", ) client = SupersetClient() - log_warning = mocker.patch("preset_cli.cli.superset.sync.dbt.command.log_warning") mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", @@ -1818,13 +1799,6 @@ def test_dbt_core_preserve_columns( catch_exceptions=False, ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "Passing the manifest.json file is deprecated. " - "Please pass the dbt_project.yml file instead." - ), - DeprecationWarning, - ) sync_database.assert_called_with( client, profiles, @@ -1845,7 +1819,6 @@ def test_dbt_core_preserve_columns( "", reload_columns=False, merge_metadata=False, - sync_columns=False, ) @@ -1870,7 +1843,6 @@ def test_dbt_core_merge_metadata( ) client = SupersetClient() mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") - log_warning = mocker.patch("preset_cli.cli.superset.sync.dbt.command.log_warning") sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", ) @@ -1903,13 +1875,6 @@ def test_dbt_core_merge_metadata( catch_exceptions=False, ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "Passing the manifest.json file is deprecated. " - "Please pass the dbt_project.yml file instead." - ), - DeprecationWarning, - ) sync_database.assert_called_with( client, profiles, @@ -1930,7 +1895,6 @@ def test_dbt_core_merge_metadata( "", reload_columns=False, merge_metadata=True, - sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -2017,7 +1981,6 @@ def test_dbt_core_raise_failures_flag_no_failures( "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) list_failed_models.assert_not_called() @@ -2099,7 +2062,6 @@ def test_dbt_core_raise_failures_flag_deprecation_warning( "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) list_failed_models.assert_not_called() @@ -2183,7 +2145,6 @@ def test_dbt_core_raise_failures_flag_with_failures( "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) @@ -2265,7 +2226,6 @@ def test_dbt_core_raise_failures_flag_with_failures_and_deprecation( "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) @@ -2425,9 +2385,6 @@ def test_dbt(mocker: MockerFixture, fs: FakeFilesystem) -> None: ) client = SupersetClient() mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") - log_warning = mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.log_warning", - ) sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", ) @@ -2459,13 +2416,6 @@ def test_dbt(mocker: MockerFixture, fs: FakeFilesystem) -> None: catch_exceptions=False, ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "Passing the manifest.json file is deprecated. " - "Please pass the dbt_project.yml file instead." - ), - DeprecationWarning, - ) sync_database.assert_called_with( client, profiles, @@ -2557,7 +2507,6 @@ def test_dbt(mocker: MockerFixture, fs: FakeFilesystem) -> None: "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) sync_exposures.assert_called_with( client, @@ -2743,9 +2692,6 @@ def test_dbt_core_disallow_edits_superset( ) client = SupersetClient() mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") - log_warning = mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.log_warning", - ) sync_database = mocker.patch( "preset_cli.cli.superset.sync.dbt.command.sync_database", ) @@ -2768,14 +2714,6 @@ def test_dbt_core_disallow_edits_superset( ) assert result.exit_code == 0 - log_warning.assert_called_with( - ( - "The managed externally feature was only introduced in Superset v1.5." - "Make sure you are running a compatible version." - ), - UserWarning, - ) - sync_database.assert_called_with( client, profiles, @@ -2865,7 +2803,6 @@ def test_dbt_cloud(mocker: MockerFixture) -> None: "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) @@ -2922,7 +2859,6 @@ def test_dbt_cloud_preserve_metadata(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=False, - sync_columns=False, ) @@ -2979,7 +2915,6 @@ def test_dbt_cloud_preserve_columns(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=False, - sync_columns=False, ) @@ -3036,7 +2971,6 @@ def test_dbt_cloud_merge_metadata(mocker: MockerFixture) -> None: "", reload_columns=False, merge_metadata=True, - sync_columns=False, ) @@ -3096,7 +3030,6 @@ def test_dbt_cloud_raise_failures_flag_no_failures(mocker: MockerFixture) -> Non "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) list_failed_models.assert_not_called() @@ -3159,7 +3092,6 @@ def test_dbt_cloud_raise_failures_flag_with_failures(mocker: MockerFixture) -> N "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) @@ -3219,64 +3151,6 @@ def test_dbt_cloud_preserve_and_merge(mocker: MockerFixture) -> None: assert "can't be combined. Please include only one to the command." in result.output -def test_dbt_cloud_sync_columns(mocker: MockerFixture) -> None: - """ - Test the ``dbt-cloud`` command with the ``--sync_columns`` flag. - """ - SupersetClient = mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.SupersetClient", - ) - superset_client = SupersetClient() - mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") - DBTClient = mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.DBTClient", - ) - dbt_client = DBTClient() - sync_datasets = mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.sync_datasets", - return_value=([], []), - ) - mocker.patch( - "preset_cli.cli.superset.sync.dbt.command.get_job", - return_value={"id": 123, "name": "My job", "environment_id": 456}, - ) - - dbt_client.get_models.return_value = dbt_cloud_models - dbt_client.get_og_metrics.return_value = dbt_cloud_metrics - database = mocker.MagicMock() - superset_client.get_databases.return_value = [database] - superset_client.get_database.return_value = database - - runner = CliRunner() - result = runner.invoke( - superset_cli, - [ - "https://superset.example.org/", - "sync", - "dbt-cloud", - "XXX", - "1", - "2", - "123", - "--preserve-metadata", - "--sync-columns", - ], - catch_exceptions=False, - ) - assert result.exit_code == 0 - sync_datasets.assert_called_with( - superset_client, - dbt_cloud_models, - superset_metrics, - database, - False, - "", - reload_columns=False, - merge_metadata=False, - sync_columns=True, - ) - - def test_dbt_cloud_no_job_id(mocker: MockerFixture) -> None: """ Test the ``dbt-cloud`` command when no job ID is specified. @@ -3330,7 +3204,6 @@ def test_dbt_cloud_no_job_id(mocker: MockerFixture) -> None: "", reload_columns=True, merge_metadata=False, - sync_columns=False, ) diff --git a/tests/cli/superset/sync/dbt/datasets_test.py b/tests/cli/superset/sync/dbt/datasets_test.py index 08d955e1..b2cd4bb2 100644 --- a/tests/cli/superset/sync/dbt/datasets_test.py +++ b/tests/cli/superset/sync/dbt/datasets_test.py @@ -551,81 +551,6 @@ def test_sync_datasets_preserve_metadata(mocker: MockerFixture) -> None: merge_metadata=False, ) - get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) - compute_metrics_mock.assert_called_with( - get_or_create_dataset_mock()["metrics"], - metrics[model_id], - False, - False, - ) - compute_columns_metadata_mock.assert_called_with( - models[0]["columns"], - client.get_dataset()["columns"], - False, - False, - ) - compute_columns_mock.assert_not_called() - compute_dataset_metadata_mock.assert_called_with( - models[0], - None, - False, - compute_metrics_mock(), - None, - [], - ) - client.create_dataset.assert_not_called() - client.get_refreshed_dataset_columns.assert_not_called() - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=False, - **compute_dataset_metadata_mock(), - ), - mock.call( - 1, - override_columns=False, - columns=compute_columns_metadata_mock(), - ), - ], - ) - - -def test_sync_datasets_preserve_metadata_sync_columns(mocker: MockerFixture) -> None: - """ - Test ``sync_datasets`` with preserve_metadata & sync_columns set to True. - """ - client = mocker.MagicMock() - get_or_create_dataset_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", - return_value={"id": 1, "metrics": [], "columns": []}, - ) - compute_metrics_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", - ) - compute_columns_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", - ) - compute_dataset_metadata_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", - ) - compute_columns_metadata_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", - ) - model_id = models[0]["unique_id"] - - sync_datasets( - client=client, - models=models, - metrics=metrics, - database={"id": 1}, - disallow_edits=False, - external_url_prefix="", - reload_columns=False, - merge_metadata=False, - sync_columns=True, - ) - get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) compute_metrics_mock.assert_called_with( get_or_create_dataset_mock()["metrics"], @@ -705,81 +630,6 @@ def test_sync_datasets_merge_metadata(mocker: MockerFixture) -> None: merge_metadata=True, ) - get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) - compute_metrics_mock.assert_called_with( - get_or_create_dataset_mock()["metrics"], - metrics[model_id], - False, - True, - ) - compute_columns_metadata_mock.assert_called_with( - models[0]["columns"], - client.get_dataset()["columns"], - False, - True, - ) - compute_columns_mock.assert_not_called() - compute_dataset_metadata_mock.assert_called_with( - models[0], - None, - False, - compute_metrics_mock(), - None, - [], - ) - client.create_dataset.assert_not_called() - client.get_refreshed_dataset_columns.assert_not_called() - client.update_dataset.assert_has_calls( - [ - mock.call( - 1, - override_columns=False, - **compute_dataset_metadata_mock(), - ), - mock.call( - 1, - override_columns=False, - columns=compute_columns_metadata_mock(), - ), - ], - ) - - -def test_sync_datasets_merge_metadata_sync_columns(mocker: MockerFixture) -> None: - """ - Test ``sync_datasets`` with merge_metadata & sync_columns set to True. - """ - client = mocker.MagicMock() - get_or_create_dataset_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.get_or_create_dataset", - return_value={"id": 1, "metrics": [], "columns": []}, - ) - compute_metrics_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_metrics", - ) - compute_columns_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_columns", - ) - compute_dataset_metadata_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_dataset_metadata", - ) - compute_columns_metadata_mock = mocker.patch( - "preset_cli.cli.superset.sync.dbt.datasets.compute_columns_metadata", - ) - model_id = models[0]["unique_id"] - - sync_datasets( - client=client, - models=models, - metrics=metrics, - database={"id": 1}, - disallow_edits=False, - external_url_prefix="", - reload_columns=False, - merge_metadata=True, - sync_columns=True, - ) - get_or_create_dataset_mock.assert_called_with(client, models[0], {"id": 1}) compute_metrics_mock.assert_called_with( get_or_create_dataset_mock()["metrics"],