Skip to content

Commit

Permalink
chore: remove virtual datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
betodealmeida committed May 10, 2024
1 parent 7a0e761 commit e7b70c5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 95 deletions.
41 changes: 7 additions & 34 deletions src/preset_cli/cli/superset/sync/dbt/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@
import logging
from typing import Any, Dict, List, Optional, Tuple

from sqlalchemy.engine.url import URL as SQLAlchemyURL
from sqlalchemy.engine.url import make_url
from yarl import URL

from preset_cli.api.clients.dbt import ModelSchema
from preset_cli.api.clients.superset import SupersetClient, SupersetMetricDefinition
from preset_cli.api.operators import OneToMany
from preset_cli.cli.superset.sync.dbt.lib import create_engine_with_check
from preset_cli.exceptions import CLIError, SupersetError
from preset_cli.lib import raise_cli_errors

Expand All @@ -24,16 +21,6 @@
_logger = logging.getLogger(__name__)


def model_in_database(model: ModelSchema, url: SQLAlchemyURL) -> bool:
"""
Return if a model is in the same database as a SQLAlchemy URI.
"""
if url.drivername == "bigquery":
return model["database"] == url.host

return model["database"] == url.database


def clean_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]:
"""
Remove incompatbile columns from metatada to create/update a column/metric.
Expand Down Expand Up @@ -65,28 +52,14 @@ def create_dataset(
model: ModelSchema,
) -> Dict[str, Any]:
"""
Create a physical or virtual dataset.
Virtual datasets are created when the table database is different from the main
database, for systems that support cross-database queries (Trino, BigQuery, etc.)
Create a (physical) dataset.
"""
url = make_url(database["sqlalchemy_uri"])
if model_in_database(model, url):
kwargs = {
"database": database["id"],
"schema": model["schema"],
"table_name": model.get("alias") or model["name"],
}
else:
engine = create_engine_with_check(url)
quote = engine.dialect.identifier_preparer.quote
source = ".".join(quote(model[key]) for key in ("database", "schema", "name"))
kwargs = {
"database": database["id"],
"schema": model["schema"],
"table_name": model.get("alias") or model["name"],
"sql": f"SELECT * FROM {source}",
}
kwargs = {
"database": database["id"],
"catalog": model["database"],
"schema": model["schema"],
"table_name": model.get("alias") or model["name"],
}

return client.create_dataset(**kwargs)

Expand Down
75 changes: 14 additions & 61 deletions tests/cli/superset/sync/dbt/datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
# pylint: disable=invalid-name, too-many-lines

import json
from typing import Any, Dict, List, cast
from typing import Any, Dict, List
from unittest import mock

import pytest
from pytest_mock import MockerFixture
from sqlalchemy.engine.url import make_url
from yarl import URL

from preset_cli.api.clients.dbt import MetricSchema, ModelSchema
Expand All @@ -24,7 +23,6 @@
create_dataset,
get_certification_info,
get_or_create_dataset,
model_in_database,
sync_datasets,
)
from preset_cli.exceptions import CLIError, ErrorLevel, ErrorPayload, SupersetError
Expand Down Expand Up @@ -147,6 +145,7 @@ def test_sync_datasets_new(mocker: MockerFixture) -> None:
)
client.create_dataset.assert_called_with(
database=1,
catalog="examples_dev",
schema="public",
table_name="messages_channels",
)
Expand Down Expand Up @@ -396,7 +395,12 @@ def test_sync_datasets_new_bq_error(mocker: MockerFixture) -> None:
)
client.create_dataset.assert_has_calls(
[
mock.call(database=1, schema="public", table_name="messages_channels"),
mock.call(
database=1,
catalog="examples_dev",
schema="public",
table_name="messages_channels",
),
],
)
client.update_dataset.assert_has_calls([])
Expand Down Expand Up @@ -741,9 +745,9 @@ def test_sync_datasets_no_columns(mocker: MockerFixture) -> None:
)


def test_create_dataset_physical(mocker: MockerFixture) -> None:
def test_create_dataset(mocker: MockerFixture) -> None:
"""
Test ``create_dataset`` for physical datasets.
Test ``create_dataset``.
"""
client = mocker.MagicMock()

Expand All @@ -759,14 +763,15 @@ def test_create_dataset_physical(mocker: MockerFixture) -> None:
)
client.create_dataset.assert_called_with(
database=1,
catalog="examples_dev",
schema="public",
table_name="messages_channels",
)


def test_create_dataset_virtual(mocker: MockerFixture) -> None:
def test_create_dataset_other_catalog(mocker: MockerFixture) -> None:
"""
Test ``create_dataset`` for virtual datasets.
Test ``create_dataset`` for models in different databases.
"""
create_engine = mocker.patch(
"preset_cli.cli.superset.sync.dbt.lib.create_engine",
Expand All @@ -786,51 +791,12 @@ def test_create_dataset_virtual(mocker: MockerFixture) -> None:
)
client.create_dataset.assert_called_with(
database=1,
catalog="examples_dev",
schema="public",
table_name="messages_channels",
sql="SELECT * FROM examples_dev.public.messages_channels",
)


def test_create_dataset_virtual_missing_dependency(
capsys: pytest.CaptureFixture[str],
mocker: MockerFixture,
) -> None:
"""
Test ``create_dataset`` for virtual datasets when the DB connection requires
an additional package.
"""
client = mocker.MagicMock()

with pytest.raises(NotImplementedError):
create_dataset(
client,
{
"id": 1,
"schema": "public",
"name": "Database",
"sqlalchemy_uri": "blah://user@host/examples",
},
models[0],
)

with pytest.raises(SystemExit) as excinfo:
create_dataset(
client,
{
"id": 1,
"schema": "public",
"name": "other_db",
"sqlalchemy_uri": "snowflake://user@host/examples",
},
models[0],
)
output_content = capsys.readouterr()
assert excinfo.type == SystemExit
assert excinfo.value.code == 1
assert "preset-cli[snowflake]" in output_content.out


def test_get_or_create_dataset_existing_dataset(mocker: MockerFixture) -> None:
"""
Test the ``get_or_create_dataset`` helper when it finds a dataset.
Expand Down Expand Up @@ -1402,19 +1368,6 @@ def test_compute_dataset_metadata_null_certification() -> None:
}


def test_model_in_database() -> None:
"""
Test the ``model_in_database`` helper.
"""
url = make_url("bigquery://project-1")
assert model_in_database(cast(ModelSchema, {"database": "project-1"}), url)
assert not model_in_database(cast(ModelSchema, {"database": "project-2"}), url)

url = make_url("snowflake://user:password@host/db1")
assert model_in_database(cast(ModelSchema, {"database": "db1"}), url)
assert not model_in_database(cast(ModelSchema, {"database": "db2"}), url)


def test_clean_metadata() -> None:
"""
Test the ``clean_metadata`` helper.
Expand Down

0 comments on commit e7b70c5

Please sign in to comment.