diff --git a/src/preset_cli/api/clients/superset.py b/src/preset_cli/api/clients/superset.py index 4691c35c..fac8cac8 100644 --- a/src/preset_cli/api/clients/superset.py +++ b/src/preset_cli/api/clients/superset.py @@ -247,7 +247,7 @@ def _run_query( schema: Optional[str] = None, limit: int = 1000, ) -> Dict[str, Any]: - url = self.baseurl / "superset/sql_json/" + url = self.baseurl / "api/v1/sqllab/execute/" data = { "client_id": shortid()[:10], "database_id": database_id, @@ -270,8 +270,14 @@ def _run_query( _logger.debug("POST %s\n%s", url, json.dumps(data, indent=4)) response = self.session.post(url, json=data) - validate_response(response) + # Legacy superset installations don't have the SQL API endpoint yet + if response.status_code == 404: + url = self.baseurl / "superset/sql_json/" + _logger.debug("POST %s\n%s", url, json.dumps(data, indent=4)) + response = self.session.post(url, json=data) + + validate_response(response) payload = response.json() return payload @@ -473,6 +479,37 @@ def update_resource( return resource + def get_resource_endpoint_info(self, resource_name: str, **kwargs: Any) -> Any: + """ + Get resource endpoint info (such as available columns) possibly filtered. + """ + query = prison.dumps({"keys": list(kwargs["keys"])} if "keys" in kwargs else {}) + + url = self.baseurl / "api/v1" / resource_name / "_info" % {"q": query} + _logger.debug("GET %s", url) + response = self.session.get(url) + validate_response(response) + + endpoint_info = response.json() + + return endpoint_info + + def validate_key_in_resource_schema( + self, resource_name: str, key_name: str, **kwargs: Any + ) -> Any: + """ + Validate if a key is present in a resource schema. + """ + schema_validation = {} + + endpoint_info = self.get_resource_endpoint_info(resource_name, **kwargs) + + for key in kwargs.get("keys", ["add_columns", "edit_columns"]): + schema_columns = [column["name"] for column in endpoint_info.get(key, [])] + schema_validation[key] = key_name in schema_columns + + return schema_validation + def get_database(self, database_id: int) -> Any: """ Return a single database. @@ -526,6 +563,16 @@ def create_dataset(self, **kwargs: Any) -> Any: if "sql" not in kwargs: return self.create_resource("dataset", **kwargs) + # Check if the dataset creation supports sql directly + not_legacy = self.validate_key_in_resource_schema( + "dataset", + "sql", + keys=["add_columns"], + ) + not_legacy = not_legacy["add_columns"] + if not_legacy: + return self.create_resource("dataset", **kwargs) + # run query to determine columns types payload = self._run_query( database_id=kwargs["database"], @@ -539,7 +586,8 @@ def create_dataset(self, **kwargs: Any) -> Any: for column in columns: column["column_name"] = column["name"] column["groupby"] = True - if column["is_dttm"]: + # Superset <= 1.4 returns ``is_date`` instead of ``is_dttm`` + if column.get("is_dttm") or column.get("is_date"): column["type_generic"] = 2 elif column["type"] is None: column["type"] = "UNKNOWN" @@ -564,7 +612,8 @@ def create_dataset(self, **kwargs: Any) -> Any: payload = response.json() - return payload["data"] + # Superset <= 1.4 returns ``{"table_id": dataset_id}`` rather than the dataset payload + return payload["data"] if "data" in payload else {"id": payload["table_id"]} def update_dataset( self, diff --git a/src/preset_cli/cli/superset/sync/dbt/command.py b/src/preset_cli/cli/superset/sync/dbt/command.py index b7cd3b51..8546c4d2 100644 --- a/src/preset_cli/cli/superset/sync/dbt/command.py +++ b/src/preset_cli/cli/superset/sync/dbt/command.py @@ -84,7 +84,7 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-locals profiles: Optional[str] = None, exposures: Optional[str] = None, import_db: bool = False, - disallow_edits: bool = True, + disallow_edits: bool = False, external_url_prefix: str = "", exposures_only: bool = False, preserve_columns: bool = False, @@ -102,6 +102,17 @@ def dbt_core( # pylint: disable=too-many-arguments, too-many-locals profiles = os.path.expanduser("~/.dbt/profiles.yml") file_path = Path(file) + + if "MANAGER_URL" not in ctx.obj and disallow_edits: + warnings.warn( + ( + "The managed externally feature was only introduced in Superset v1.5." + "Make sure you are running a compatible version." + ), + category=UserWarning, + stacklevel=2, + ) + if file_path.name == "manifest.json": warnings.warn( ( @@ -324,7 +335,7 @@ def dbt_cloud( # pylint: disable=too-many-arguments, too-many-locals exclude: Tuple[str, ...], exposures: Optional[str] = None, job_id: Optional[int] = None, - disallow_edits: bool = True, + disallow_edits: bool = False, external_url_prefix: str = "", exposures_only: bool = False, preserve_columns: bool = False, diff --git a/tests/api/clients/superset_test.py b/tests/api/clients/superset_test.py index b8119eb2..c5bd1db6 100644 --- a/tests/api/clients/superset_test.py +++ b/tests/api/clients/superset_test.py @@ -36,6 +36,93 @@ def test_run_query(mocker: MockerFixture, requests_mock: Mocker) -> None: """ _logger = mocker.patch("preset_cli.api.clients.superset._logger") mocker.patch("preset_cli.api.clients.superset.shortid", return_value="5b8f4d8c89") + + requests_mock.post( + "https://superset.example.org/api/v1/sqllab/execute/", + json={ + "query_id": 2, + "status": "success", + "data": [{"value": 1}], + "columns": [{"name": "value", "type": "INT", "is_date": False}], + "selected_columns": [{"name": "value", "type": "INT", "is_date": False}], + "expanded_columns": [], + "query": { + "changedOn": "2022-03-25T15:37:00.393660", + "changed_on": "2022-03-25T15:37:00.393660", + "dbId": 1, + "db": "examples", + "endDttm": 1648222620417.808, + "errorMessage": None, + "executedSql": "SELECT 1 AS value\nLIMIT 1001", + "id": "IrwwY8Ky14", + "queryId": 2, + "limit": 1000, + "limitingFactor": "NOT_LIMITED", + "progress": 100, + "rows": 1, + "schema": "public", + "ctas": False, + "serverId": 2, + "sql": "SELECT 1 AS value", + "sqlEditorId": "1", + "startDttm": 1648222620279.198000, + "state": "success", + "tab": "Untitled Query 1", + "tempSchema": None, + "tempTable": None, + "userId": 1, + "user": "admin admin", + "resultsKey": None, + "trackingUrl": None, + "extra": {"cancel_query": 35121, "progress": None}, + }, + }, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + results = client.run_query(database_id=1, sql="SELECT 1 AS value", limit=10) + assert results.to_dict() == {"value": {0: 1}} + + _logger.debug.assert_called_with( + "POST %s\n%s", + URL("https://superset.example.org/api/v1/sqllab/execute/"), + """{ + "client_id": "5b8f4d8c89", + "database_id": 1, + "json": true, + "runAsync": false, + "schema": null, + "sql": "SELECT 1 AS value", + "sql_editor_id": "1", + "tab": "Untitled Query 2", + "tmp_table_name": "", + "select_as_cta": false, + "ctas_method": "TABLE", + "queryLimit": 10, + "expand_data": true +}""", + ) + + +def test_run_query_legacy_endpoint( + mocker: MockerFixture, + requests_mock: Mocker, +) -> None: + """ + Test the ``run_query`` method for legacy Superset instances. + """ + _logger = mocker.patch("preset_cli.api.clients.superset._logger") + mocker.patch("preset_cli.api.clients.superset.shortid", return_value="5b8f4d8c89") + + # Mock POST request to new endpoint + requests_mock.post( + "https://superset.example.org/api/v1/sqllab/execute/", + status_code=404, + ) + + # Mock POST request to legacy endpoint requests_mock.post( "https://superset.example.org/superset/sql_json/", json={ @@ -83,6 +170,29 @@ def test_run_query(mocker: MockerFixture, requests_mock: Mocker) -> None: results = client.run_query(database_id=1, sql="SELECT 1 AS value", limit=10) assert results.to_dict() == {"value": {0: 1}} + + # Assert request to new endpoint was made first + _logger.debug.assert_any_call( + "POST %s\n%s", + URL("https://superset.example.org/api/v1/sqllab/execute/"), + """{ + "client_id": "5b8f4d8c89", + "database_id": 1, + "json": true, + "runAsync": false, + "schema": null, + "sql": "SELECT 1 AS value", + "sql_editor_id": "1", + "tab": "Untitled Query 2", + "tmp_table_name": "", + "select_as_cta": false, + "ctas_method": "TABLE", + "queryLimit": 10, + "expand_data": true +}""", + ) + + # Assert request to legacy endpoint then _logger.debug.assert_called_with( "POST %s\n%s", URL("https://superset.example.org/superset/sql_json/"), @@ -126,7 +236,7 @@ def test_run_query_error(requests_mock: Mocker) -> None: }, ] requests_mock.post( - "https://superset.example.org/superset/sql_json/", + "https://superset.example.org/api/v1/sqllab/execute/", json={"errors": errors}, headers={"Content-Type": "application/json"}, status_code=400, @@ -183,7 +293,7 @@ def test_convert_to_adhoc_column() -> None: def test_get_data(requests_mock: Mocker) -> None: """ - Test the ``run_query`` method. + Test the ``get_data`` method. """ requests_mock.get( "https://superset.example.org/api/v1/dataset/27", @@ -856,6 +966,365 @@ def test_update_resource(requests_mock: Mocker) -> None: } +def test_get_resource_endpoint_info(requests_mock: Mocker) -> None: + """ + Test the ``get_resource_endpoint_info`` method. + """ + + expected_response = json.dumps( + { + "add_columns": [ + { + "description": "", + "label": "Database", + "name": "database", + "required": True, + "type": "Integer", + "unique": False, + }, + { + "description": "", + "label": "Schema", + "name": "schema", + "required": False, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Table Name", + "name": "table_name", + "required": True, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Sql", + "name": "sql", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Owners", + "name": "owners", + "required": False, + "type": "List", + "unique": False, + }, + ], + "add_title": "Add Sqla Table", + "edit_columns": [ + { + "description": "", + "label": "Table Name", + "name": "table_name", + "required": False, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Sql", + "name": "sql", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Filter Select Enabled", + "name": "filter_select_enabled", + "required": False, + "type": "Boolean", + "unique": False, + }, + { + "description": "", + "label": "Fetch Values Predicate", + "name": "fetch_values_predicate", + "required": False, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Schema", + "name": "schema", + "required": False, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Description", + "name": "description", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Main Dttm Col", + "name": "main_dttm_col", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Offset", + "name": "offset", + "required": False, + "type": "Integer", + "unique": False, + }, + { + "description": "", + "label": "Default Endpoint", + "name": "default_endpoint", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Cache Timeout", + "name": "cache_timeout", + "required": False, + "type": "Integer", + "unique": False, + }, + { + "description": "", + "label": "Is Sqllab View", + "name": "is_sqllab_view", + "required": False, + "type": "Boolean", + "unique": False, + }, + { + "description": "", + "label": "Template Params", + "name": "template_params", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Owners", + "name": "owners", + "required": False, + "type": "List", + "unique": False, + }, + { + "description": "", + "label": "Columns", + "name": "columns", + "required": False, + "type": "List", + "unique": False, + }, + { + "description": "", + "label": "Metrics", + "name": "metrics", + "required": False, + "type": "List", + "unique": False, + }, + { + "description": "", + "label": "Extra", + "name": "extra", + "required": False, + "type": "String", + "unique": False, + }, + ], + "edit_title": "Edit Sqla Table", + "filters": { + "database": [ + {"name": "Relation", "operator": "rel_o_m"}, + {"name": "No Relation", "operator": "nrel_o_m"}, + ], + "id": [ + {"name": "Equal to", "operator": "eq"}, + {"name": "Greater than", "operator": "gt"}, + {"name": "Smaller than", "operator": "lt"}, + {"name": "Not Equal to", "operator": "neq"}, + {"name": "Is certified", "operator": "dataset_is_certified"}, + ], + "owners": [{"name": "Relation as Many", "operator": "rel_m_m"}], + "schema": [ + {"name": "Starts with", "operator": "sw"}, + {"name": "Ends with", "operator": "ew"}, + {"name": "Contains", "operator": "ct"}, + {"name": "Equal to", "operator": "eq"}, + {"name": "Not Starts with", "operator": "nsw"}, + {"name": "Not Ends with", "operator": "new"}, + {"name": "Not Contains", "operator": "nct"}, + {"name": "Not Equal to", "operator": "neq"}, + ], + "sql": [ + {"name": "Starts with", "operator": "sw"}, + {"name": "Ends with", "operator": "ew"}, + {"name": "Contains", "operator": "ct"}, + {"name": "Equal to", "operator": "eq"}, + {"name": "Not Starts with", "operator": "nsw"}, + {"name": "Not Ends with", "operator": "new"}, + {"name": "Not Contains", "operator": "nct"}, + {"name": "Not Equal to", "operator": "neq"}, + {"name": "Null or Empty", "operator": "dataset_is_null_or_empty"}, + ], + "table_name": [ + {"name": "Starts with", "operator": "sw"}, + {"name": "Ends with", "operator": "ew"}, + {"name": "Contains", "operator": "ct"}, + {"name": "Equal to", "operator": "eq"}, + {"name": "Not Starts with", "operator": "nsw"}, + {"name": "Not Ends with", "operator": "new"}, + {"name": "Not Contains", "operator": "nct"}, + {"name": "Not Equal to", "operator": "neq"}, + ], + }, + "permissions": [ + "can_warm_up_cache", + "can_export", + "can_get_or_create_dataset", + "can_duplicate", + "can_write", + "can_read", + ], + }, + ) + + requests_mock.get( + "https://superset.example.org/api/v1/dataset/_info?q=()", + json=expected_response, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resource_endpoint_info("dataset") + assert response == expected_response + + +def test_get_resource_endpoint_info_filtered(requests_mock: Mocker) -> None: + """ + Test the ``get_resource_endpoint_info`` method with an applied filter. + """ + + expected_response = json.dumps( + { + "add_columns": [ + { + "description": "", + "label": "Database", + "name": "database", + "required": True, + "type": "Integer", + "unique": False, + }, + { + "description": "", + "label": "Schema", + "name": "schema", + "required": False, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Table Name", + "name": "table_name", + "required": True, + "type": "String", + "unique": False, + "validate": [""], + }, + { + "description": "", + "label": "Sql", + "name": "sql", + "required": False, + "type": "String", + "unique": False, + }, + { + "description": "", + "label": "Owners", + "name": "owners", + "required": False, + "type": "List", + "unique": False, + }, + ], + }, + ) + + requests_mock.get( + "https://superset.example.org/api/v1/dataset/_info?q=(keys:!(add_columns))", + json=expected_response, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resource_endpoint_info("dataset", keys=["add_columns"]) + assert response == expected_response + + +def test_validate_key_in_resource_schema(mocker: MockerFixture) -> None: + """ + Test the `validate_key_in_resource_schema` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resource_endpoint_info = mocker.patch.object( + client, + "get_resource_endpoint_info", + ) + + client.validate_key_in_resource_schema( + "dataset", + "sql", + ) + + get_resource_endpoint_info.assert_called_with("dataset") + + +def test_validate_key_in_resource_schema_filtered(mocker: MockerFixture) -> None: + """ + Test the `validate_key_in_resource_schema` method with filters. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resource_endpoint_info = mocker.patch.object( + client, + "get_resource_endpoint_info", + ) + + client.validate_key_in_resource_schema( + "dataset", + "sql", + keys=["add_columns"], + ) + + get_resource_endpoint_info.assert_called_with("dataset", keys=["add_columns"]) + + def test_update_resource_with_query_args(requests_mock: Mocker) -> None: """ Test the generic ``update_resource`` method. @@ -2998,12 +3467,42 @@ def test_update_role(requests_mock: Mocker) -> None: ) -def test_create_virtual_dataset(requests_mock: Mocker) -> None: +def test_create_virtual_dataset(mocker: MockerFixture) -> None: """ Test the ``create_dataset`` method with virtual datasets. """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + create_resource = mocker.patch.object(client, "create_resource") + validate_key_in_resource_schema = mocker.patch.object( + client, + "validate_key_in_resource_schema", + ) + validate_key_in_resource_schema.return_value = {"add_columns": True} + + client.create_dataset( + database_name="my_db", + sqlalchemy_uri="gsheets://", + sql="select 1 as one", + ) + + create_resource.assert_called_with( + "dataset", + database_name="my_db", + sqlalchemy_uri="gsheets://", + sql="select 1 as one", + ) + + +def test_create_virtual_dataset_legacy( + requests_mock: Mocker, + mocker: MockerFixture, +) -> None: + """ + Test the ``create_dataset`` method with virtual datasets for legacy Superset instances. + """ requests_mock.post( - "https://superset.example.org/superset/sql_json/", + "https://superset.example.org/api/v1/sqllab/execute/", json={ "query_id": 137, "status": "success", @@ -3080,6 +3579,12 @@ def test_create_virtual_dataset(requests_mock: Mocker) -> None: auth = Auth() client = SupersetClient("https://superset.example.org/", auth) + validate_key_in_resource_schema = mocker.patch.object( + client, + "validate_key_in_resource_schema", + ) + validate_key_in_resource_schema.return_value = {"add_columns": False} + client.create_dataset( database=1, schema="public", diff --git a/tests/cli/superset/sync/dbt/command_test.py b/tests/cli/superset/sync/dbt/command_test.py index 0befd5b3..4eefde2d 100644 --- a/tests/cli/superset/sync/dbt/command_test.py +++ b/tests/cli/superset/sync/dbt/command_test.py @@ -4,6 +4,7 @@ # pylint: disable=invalid-name, too-many-lines import os +import warnings from pathlib import Path import pytest @@ -686,6 +687,77 @@ def test_dbt_core_no_database(mocker: MockerFixture, fs: FakeFilesystem) -> None assert result.output == "No database was found, pass --import-db to create\n" +def test_dbt_core_disallow_edits_superset( + mocker: MockerFixture, + fs: FakeFilesystem, +) -> None: + """ + Test the ``dbt-core`` command with ``--disallow-edits`` for Superset legacy installation. + """ + root = Path("/path/to/root") + fs.create_dir(root) + dbt_project = root / "default/dbt_project.yml" + fs.create_file( + dbt_project, + contents=yaml.dump( + { + "name": "my_project", + "profile": "default", + "target-path": "target", + }, + ), + ) + manifest = root / "default/target/manifest.json" + fs.create_file(manifest, contents=manifest_contents) + profiles = root / ".dbt/profiles.yml" + fs.create_file(profiles) + + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.SupersetClient", + ) + client = SupersetClient() + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + sync_database = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_database", + ) + + runner = CliRunner() + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = runner.invoke( + superset_cli, + [ + "https://superset.example.org/", + "sync", + "dbt-core", + str(dbt_project), + "--profiles", + str(profiles), + "--disallow-edits", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + + assert issubclass(w[0].category, UserWarning) + assert ( + "The managed externally feature was only introduced in Superset v1.5." + in str(w[0].message) + ) + + sync_database.assert_called_with( + client, + profiles, + "my_project", + "default", + None, + False, + True, + "", + ) + + def test_dbt_cloud(mocker: MockerFixture) -> None: """ Test the ``dbt-cloud`` command. diff --git a/tests/cli/superset/sync/dbt/databases_test.py b/tests/cli/superset/sync/dbt/databases_test.py index 15301845..0ac22875 100644 --- a/tests/cli/superset/sync/dbt/databases_test.py +++ b/tests/cli/superset/sync/dbt/databases_test.py @@ -289,12 +289,12 @@ def test_sync_database_multiple_databases( assert str(excinfo.value) == "More than one database with the same name found" -def test_sync_database_external_url_prefix( +def test_sync_database_external_url_prefix_disallow_edits( mocker: MockerFixture, fs: FakeFilesystem, ) -> None: """ - Test ``sync_database`` with an external URL prefix. + Test ``sync_database`` with an external URL prefix and disallow-edits. """ fs.create_file( "/path/to/.dbt/profiles.yml", diff --git a/tests/cli/superset/sync/dbt/datasets_test.py b/tests/cli/superset/sync/dbt/datasets_test.py index 075a51aa..ede20d5a 100644 --- a/tests/cli/superset/sync/dbt/datasets_test.py +++ b/tests/cli/superset/sync/dbt/datasets_test.py @@ -446,9 +446,9 @@ def test_sync_datasets_multiple_existing(mocker: MockerFixture) -> None: assert str(excinfo.value) == "More than one dataset found" -def test_sync_datasets_external_url(mocker: MockerFixture) -> None: +def test_sync_datasets_external_url_disallow_edits(mocker: MockerFixture) -> None: """ - Test ``sync_datasets`` when passing external URL prefix. + Test ``sync_datasets`` when passing external URL prefix and disallow-edits. """ client = mocker.MagicMock() client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] @@ -461,7 +461,7 @@ def test_sync_datasets_external_url(mocker: MockerFixture) -> None: models=models, metrics=metrics, database={"id": 1}, - disallow_edits=False, + disallow_edits=True, external_url_prefix="https://dbt.example.org/", ) client.create_dataset.assert_not_called() @@ -478,7 +478,7 @@ def test_sync_datasets_external_url(mocker: MockerFixture) -> None: "certification": {"details": "This table is produced by dbt"}, }, ), - is_managed_externally=False, + is_managed_externally=True, metrics=[], external_url=( "https://dbt.example.org/"