diff --git a/src/preset_cli/api/clients/__init__.py b/src/preset_cli/api/clients/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/preset_cli/api/client.py b/src/preset_cli/api/clients/preset.py similarity index 97% rename from src/preset_cli/api/client.py rename to src/preset_cli/api/clients/preset.py index f0a6dc42..a6b30e47 100644 --- a/src/preset_cli/api/client.py +++ b/src/preset_cli/api/clients/preset.py @@ -4,10 +4,10 @@ from typing import Any, List, Union -from superset_sdk.auth.main import Auth from yarl import URL from preset_cli import __version__ +from preset_cli.auth.main import Auth class PresetClient: # pylint: disable=too-few-public-methods diff --git a/src/preset_cli/api/clients/superset.py b/src/preset_cli/api/clients/superset.py new file mode 100644 index 00000000..ddd3e41a --- /dev/null +++ b/src/preset_cli/api/clients/superset.py @@ -0,0 +1,485 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +A simple client for running SQL queries against Superset: + + >>> from yarl import URL + >>> from preset_cli.api.clients.superset import SupersetClient + >>> from preset_cli.auth.main import UsernamePasswordAuth + >>> url = URL("http://localhost:8088/") + >>> auth = UsernamePasswordAuth(url, "admin", "admin") # doctest: +SKIP + >>> client = SupersetClient(url, auth) # doctest: +SKIP + >>> sql = "SELECT platform, rank FROM video_game_sales LIMIT 2" + >>> print(client.run_query(database_id=1, sql=sql)) # doctest: +SKIP + platform rank + 0 Wii 1 + 1 NES 2 + +Data is returned in a Pandas Dataframe. + +""" + +import json +import uuid +from datetime import datetime +from enum import IntEnum +from io import BytesIO +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union +from uuid import uuid4 + +import pandas as pd +import prison +from yarl import URL + +from preset_cli import __version__ +from preset_cli.api.operators import Equal, Operator +from preset_cli.auth.main import Auth +from preset_cli.exceptions import SupersetError + + +class GenericDataType(IntEnum): + """ + Generic database column type that fits both frontend and backend. + """ + + NUMERIC = 0 + STRING = 1 + TEMPORAL = 2 + BOOLEAN = 3 + + +class AdhocMetricColumn(TypedDict, total=False): + """ + Schema for an adhoc metric column. + """ + + column_name: Optional[str] + description: Optional[str] + expression: Optional[str] + filterable: bool + groupby: bool + id: int + is_dttm: bool + python_date_format: Optional[str] + type: str + type_generic: GenericDataType + verbose_name: Optional[str] + + +class MetricType(TypedDict): + """ + Schema for an adhoc metric in the Chart API. + """ + + aggregate: Optional[str] + column: Optional[AdhocMetricColumn] + expressionType: Literal["SIMPLE", "SQL"] + hasCustomLabel: Optional[bool] + label: Optional[str] + sqlExpression: Optional[str] + isNew: bool + optionName: str + + +def convert_to_adhoc_metric(expression: str) -> MetricType: + """ + Convert an adhoc metric to an object. + """ + return { + "aggregate": None, + "column": None, + "expressionType": "SQL", + "hasCustomLabel": False, + "isNew": False, + "label": expression, + "optionName": f"metric_{uuid4()}", + "sqlExpression": expression, + } + + +class ColumnType(TypedDict): + """ + Schema for an adhoc column in the Chart API. + """ + + label: str + sqlExpression: str + + +def convert_to_adhoc_column(expression: str) -> ColumnType: + """ + Convert an adhoc column to an object. + """ + return { + "label": expression, + "sqlExpression": expression, + } + + +def shortid() -> str: + """ + Generate a short ID suited for a SQL Lab client ID. + """ + return str(uuid.uuid4())[-12:] + + +class SupersetClient: # pylint: disable=too-few-public-methods + + """ + A client for running queries against Superset. + """ + + def __init__(self, baseurl: Union[str, URL], auth: Auth): + # convert to URL if necessary + self.baseurl = URL(baseurl) + self.auth = auth + + def run_query(self, database_id: int, sql: str, limit: int = 1000) -> pd.DataFrame: + """ + Run a SQL query, returning a Pandas dataframe. + """ + url = self.baseurl / "superset/sql_json/" + data = { + "client_id": shortid()[:10], + "database_id": database_id, + "json": True, + "runAsync": False, + "schema": None, + "sql": sql, + "sql_editor_id": "1", + "tab": "Untitled Query 2", + "tmp_table_name": "", + "select_as_cta": False, + "ctas_method": "TABLE", + "queryLimit": limit, + "expand_data": True, + } + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "User-Agent": f"Apache Superset Client ({__version__})", + "Referer": str(self.baseurl), + } + headers.update(self.auth.get_headers()) + + session = self.auth.get_session() + response = session.post(url, json=data, headers=headers) + payload = response.json() + if payload.get("errors"): + raise SupersetError(errors=payload["errors"]) + + return pd.DataFrame(payload["data"]) + + def get_data( # pylint: disable=too-many-locals, too-many-arguments + self, + dataset_id: int, + metrics: List[str], + columns: List[str], + is_timeseries: bool = False, + time_column: Optional[str] = None, + start: Optional[datetime] = None, + end: Optional[datetime] = None, + granularity: Optional[str] = None, + where: str = "", + having: str = "", + row_limit: int = 10000, + force: bool = False, + ) -> pd.DataFrame: + """ + Run a dimensional query. + """ + dataset = self.get_dataset(dataset_id)["result"] + + if time_column is None: + time_columns = [ + column["column_name"] + for column in dataset["columns"] + if column["is_dttm"] + ] + if len(time_columns) > 1: + options = ", ".join(time_columns) + raise Exception( + f"Unable to determine time column, please pass `time_series` " + f"as one of: {options}", + ) + time_column = time_columns[0] + + time_range = ( + "No filter" + if start is None and end is None + else f"{start or ''} : {end or ''}" + ) + + # convert adhoc metrics to a proper object, if needed + metric_names = [metric["metric_name"] for metric in dataset["metrics"]] + processed_metrics = [ + metric if metric in metric_names else convert_to_adhoc_metric(metric) + for metric in metrics + ] + + # same for columns + column_names = [column["column_name"] for column in dataset["columns"]] + processed_columns = [ + column if column in column_names else convert_to_adhoc_column(column) + for column in columns + ] + + url = self.baseurl / "api/v1/chart/data" + data: Dict[str, Any] = { + "datasource": {"id": dataset_id, "type": "table"}, + "force": force, + "queries": [ + { + "annotation_layers": [], + "applied_time_extras": {}, + "columns": processed_columns, + "custom_form_data": {}, + "custom_params": {}, + "extras": {"having": having, "having_druid": [], "where": where}, + "filters": [], + "is_timeseries": is_timeseries, + "metrics": processed_metrics, + "order_desc": True, + "orderby": [], + "row_limit": row_limit, + "time_range": time_range, + "timeseries_limit": 0, + "url_params": {}, + }, + ], + "result_format": "json", + "result_type": "full", + } + if is_timeseries: + data["queries"][0]["granularity"] = time_column + data["queries"][0]["extras"]["time_grain_sqla"] = granularity + + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "User-Agent": f"Apache Superset Client ({__version__})", + "Referer": str(self.baseurl), + } + headers.update(self.auth.get_headers()) + + session = self.auth.get_session() + response = session.post(url, json=data, headers=headers) + payload = response.json() + if payload.get("errors"): + raise SupersetError(errors=payload["errors"]) + + return pd.DataFrame(payload["result"][0]["data"]) + + def get_resource(self, resource: str, resource_id: int) -> Any: + """ + Return a single resource. + """ + url = self.baseurl / "api/v1" / resource / str(resource_id) + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + response = session.get(url, headers=headers) + response.raise_for_status() + + resource = response.json() + + return resource + + def get_resources(self, resource: str, **kwargs: Any) -> List[Any]: + """ + Return one or more of a resource, possibly filtered. + """ + operations = { + k: v if isinstance(v, Operator) else Equal(v) for k, v in kwargs.items() + } + query = prison.dumps( + { + "filters": [ + dict(col=col, opr=value.operator, value=value.value) + for col, value in operations.items() + ], + }, + ) + url = self.baseurl / "api/v1" / resource / "" % {"q": query} + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + response = session.get(url, headers=headers) + response.raise_for_status() + + payload = response.json() + resources = payload["result"] + + return resources + + def create_resource(self, resource: str, **kwargs: Any) -> Any: + """ + Create a resource. + """ + url = self.baseurl / "api/v1" / resource / "" + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + response = session.post(url, json=kwargs, headers=headers) + response.raise_for_status() + + resource = response.json() + + return resource + + def update_resource( + self, + resource: str, + resource_id: int, + query_args: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> Any: + """ + Update a resource. + """ + url = self.baseurl / "api/v1" / resource / str(resource_id) + if query_args: + url %= query_args + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + response = session.put(url, json=kwargs, headers=headers) + response.raise_for_status() + + resource = response.json() + + return resource + + def get_database(self, database_id: int) -> Any: + """ + Return a single database. + """ + return self.get_resource("database", database_id) + + def get_databases(self, **kwargs: str) -> List[Any]: + """ + Return databases, possibly filtered. + """ + return self.get_resources("database", **kwargs) + + def create_database(self, **kwargs: Any) -> Any: + """ + Create a database. + """ + return self.create_resource("database", **kwargs) + + def update_database(self, database_id: int, **kwargs: Any) -> Any: + """ + Update a database. + """ + query_args = {"override_columns": "true"} + return self.update_resource("database", database_id, query_args, **kwargs) + + def get_dataset(self, dataset_id: int) -> Any: + """ + Return a single dataset. + """ + return self.get_resource("dataset", dataset_id) + + def get_datasets(self, **kwargs: str) -> List[Any]: + """ + Return datasets, possibly filtered. + """ + return self.get_resources("dataset", **kwargs) + + def create_dataset(self, **kwargs: Any) -> Any: + """ + Create a dataset. + """ + return self.create_resource("dataset", **kwargs) + + def update_dataset(self, dataset_id: int, **kwargs: Any) -> Any: + """ + Update a dataset. + """ + return self.update_resource("dataset", dataset_id, **kwargs) + + def get_dashboard(self, dashboard_id: int) -> Any: + """ + Return a single dashboard. + """ + return self.get_resource("dashboard", dashboard_id) + + def get_dashboards(self, **kwargs: str) -> List[Any]: + """ + Return dashboards, possibly filtered. + """ + return self.get_resources("dashboard", **kwargs) + + def create_dashboard(self, **kwargs: Any) -> Any: + """ + Create a dashboard. + """ + return self.create_resource("dashboard", **kwargs) + + def update_dashboard(self, dashboard_id: int, **kwargs: Any) -> Any: + """ + Update a dashboard. + """ + return self.update_resource("dashboard", dashboard_id, **kwargs) + + def export_zip(self, resource: str, ids: List[int]) -> BytesIO: + """ + Export one or more of a resource. + """ + url = self.baseurl / "api/v1" / resource / "export/" + params = {"q": prison.dumps(ids)} + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + response = session.get(url, params=params, headers=headers) + + if not response.ok: + payload = response.json() + raise SupersetError(errors=payload["errors"]) + + return BytesIO(response.content) + + def import_zip(self, resource: str, data: BytesIO, overwrite: bool = False) -> bool: + """ + Import a ZIP bundle. + """ + url = self.baseurl / "api/v1" / resource / "import/" + + session = self.auth.get_session() + headers = self.auth.get_headers() + headers["Referer"] = str(self.baseurl) + headers["Accept"] = "application/json" + response = session.post( + url, + files=dict(formData=data), + data=dict(overwrite=json.dumps(overwrite)), + headers=headers, + ) + + payload = response.json() + + if payload.get("errors"): + raise SupersetError(errors=payload["errors"]) + + return payload["message"] == "OK" diff --git a/src/preset_cli/api/operators.py b/src/preset_cli/api/operators.py new file mode 100644 index 00000000..0f97cb1e --- /dev/null +++ b/src/preset_cli/api/operators.py @@ -0,0 +1,34 @@ +""" +Operators for filtering the API. +""" + +# pylint: disable=too-few-public-methods + +from typing import Any + + +class Operator: + """ + A filter operator. + """ + + operator = "invalid" + + def __init__(self, value: Any): + self.value = value + + +class Equal(Operator): + """ + Equality operator. + """ + + operator = "eq" + + +class OneToMany(Operator): + """ + Operator for one-to-many relationships. + """ + + operator = "rel_o_m" diff --git a/src/preset_cli/auth/__init__.py b/src/preset_cli/auth/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/preset_cli/auth/jwt.py b/src/preset_cli/auth/jwt.py new file mode 100644 index 00000000..a8bdf475 --- /dev/null +++ b/src/preset_cli/auth/jwt.py @@ -0,0 +1,20 @@ +""" +JWT auth. +""" + +from typing import Dict + +from preset_cli.auth.main import Auth + + +class JWTAuth(Auth): # pylint: disable=too-few-public-methods + """ + Auth via JWT. + """ + + def __init__(self, jwt_token: str): + super().__init__() + self.jwt_token = jwt_token + + def get_headers(self) -> Dict[str, str]: + return {"Authorization": f"Bearer {self.jwt_token}"} diff --git a/src/preset_cli/auth/main.py b/src/preset_cli/auth/main.py new file mode 100644 index 00000000..7b86298f --- /dev/null +++ b/src/preset_cli/auth/main.py @@ -0,0 +1,63 @@ +""" +Mechanisms for authentication and authorization. +""" + +from typing import Dict, Optional + +import requests +from bs4 import BeautifulSoup +from yarl import URL + + +class Auth: # pylint: disable=too-few-public-methods + """ + An authentication/authorization mechanism. + """ + + def __init__(self): + self.session = requests.Session() + self.headers = {} + + def get_session(self) -> requests.Session: + """ + Return a session. + """ + return self.session + + def get_headers(self) -> Dict[str, str]: # pylint: disable=no-self-use + """ + Return headers for auth. + """ + return self.headers + + +class UsernamePasswordAuth(Auth): # pylint: disable=too-few-public-methods + """ + Auth via username/password. + """ + + def __init__(self, baseurl: URL, username: str, password: Optional[str] = None): + super().__init__() + self._do_login(baseurl, username, password) + + def _do_login( + self, + baseurl: URL, + username: str, + password: Optional[str] = None, + ) -> None: + """ + Login to get CSRF token and cookies. + """ + response = self.session.get(baseurl / "login/") + soup = BeautifulSoup(response.text, "html.parser") + csrf_token = soup.find("input", {"id": "csrf_token"})["value"] + + # update headers + self.headers["X-CSRFToken"] = csrf_token + + # set cookies + self.session.post( + baseurl / "login/", + data=dict(username=username, password=password, csrf_token=csrf_token), + ) diff --git a/src/preset_cli/cli/main.py b/src/preset_cli/cli/main.py index 96a0023a..05ff7ba8 100644 --- a/src/preset_cli/cli/main.py +++ b/src/preset_cli/cli/main.py @@ -12,10 +12,10 @@ import requests import yaml from appdirs import user_config_dir -from superset_sdk.auth.jwt import JWTAuth from yarl import URL -from preset_cli.api.client import PresetClient +from preset_cli.api.clients.preset import PresetClient +from preset_cli.auth.jwt import JWTAuth from preset_cli.cli.superset.main import superset CREDENTIALS_FILE = "credentials.yaml" diff --git a/src/preset_cli/cli/superset/export.py b/src/preset_cli/cli/superset/export.py new file mode 100644 index 00000000..f6cfc8af --- /dev/null +++ b/src/preset_cli/cli/superset/export.py @@ -0,0 +1,73 @@ +""" +A command to export Superset resources into a directory. +""" + +from pathlib import Path +from zipfile import ZipFile + +import click +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.lib import remove_root + + +@click.command() +@click.argument("directory", type=click.Path(exists=True, resolve_path=True)) +@click.option( + "--overwrite", + is_flag=True, + default=False, + help="Overwrite existing resources", +) +@click.pass_context +def export( # pylint: disable=too-many-locals + ctx: click.core.Context, + directory: str, + overwrite: bool = False, +) -> None: + """ + Export DBs/datasets/charts/dashboards to a directory. + """ + auth = ctx.obj["AUTH"] + url = URL(ctx.obj["INSTANCE"]) + client = SupersetClient(url, auth) + root = Path(directory) + + for resource in ["database", "dataset", "chart", "dashboard"]: + export_resource(resource, root, client, overwrite) + + +def export_resource( + resource: str, + root: Path, + client: SupersetClient, + overwrite: bool, +) -> None: + """ + Export a given resource and unzip it in a directory. + """ + resources = client.get_resources(resource) + ids = [resource["id"] for resource in resources] + buf = client.export_zip(resource, ids) + + with ZipFile(buf) as bundle: + contents = { + remove_root(file_name): bundle.read(file_name).decode() + for file_name in bundle.namelist() + } + + for file_name, file_contents in contents.items(): + # skip related files + if not file_name.startswith(resource): + continue + + target = root / file_name + if target.exists() and not overwrite: + raise Exception( + f"File already exists and --overwrite was not specified: {target}", + ) + if not target.parent.exists(): + target.parent.mkdir(parents=True, exist_ok=True) + with open(target, "w", encoding="utf-8") as output: + output.write(file_contents) diff --git a/src/preset_cli/cli/superset/main.py b/src/preset_cli/cli/superset/main.py index 098f5a1f..f2c51513 100644 --- a/src/preset_cli/cli/superset/main.py +++ b/src/preset_cli/cli/superset/main.py @@ -1,55 +1,47 @@ """ -Dispatcher for Superset commands. +Main entry point for Superset commands. """ -from typing import Any - import click -from superset_sdk.cli.main import superset_cli +from yarl import URL + +from preset_cli.auth.main import UsernamePasswordAuth +from preset_cli.cli.superset.export import export +from preset_cli.cli.superset.sql import sql +from preset_cli.cli.superset.sync.main import sync @click.group() +@click.argument("instance") +@click.option("-u", "--username", default="admin", help="Username") +@click.option( + "-p", + "--password", + prompt=True, + prompt_required=False, + default="admin", + hide_input=True, + help="Password (leave empty for prompt)", +) @click.pass_context -def superset(ctx: click.core.Context) -> None: +def superset( + ctx: click.core.Context, + instance: str, + username: str = "admin", + password: str = "admin", +): """ - Send commands to one or more Superset instances. + An Apache Superset CLI. """ ctx.ensure_object(dict) + ctx.obj["INSTANCE"] = instance -def mutate_commands(source: click.core.Group, target: click.core.Group) -> None: - """ - Programmatically modify commands so they work with workspaces. - """ - for name, command in source.commands.items(): - - if isinstance(command, click.core.Group): - - @click.group() - @click.pass_context - def new_group( - ctx: click.core.Context, *args: Any, command=command, **kwargs: Any - ) -> None: - ctx.invoke(command, *args, **kwargs) - - mutate_commands(command, new_group) - new_group.params = command.params[:] - target.add_command(new_group, name) - - else: - - @click.command() - @click.pass_context - def new_command( - ctx: click.core.Context, *args: Any, command=command, **kwargs: Any - ) -> None: - for instance in ctx.obj["WORKSPACES"]: - click.echo(f"\n{instance}") - ctx.obj["INSTANCE"] = instance - ctx.invoke(command, *args, **kwargs) - - new_command.params = command.params[:] - target.add_command(new_command, name) + # allow a custom authenticator to be passed via the context + if "AUTH" not in ctx.obj: + ctx.obj["AUTH"] = UsernamePasswordAuth(URL(instance), username, password) -mutate_commands(superset_cli, superset) +superset.add_command(sql) +superset.add_command(sync) +superset.add_command(export) diff --git a/src/preset_cli/cli/superset/sql.py b/src/preset_cli/cli/superset/sql.py new file mode 100644 index 00000000..2cf85f89 --- /dev/null +++ b/src/preset_cli/cli/superset/sql.py @@ -0,0 +1,163 @@ +""" +Run SQL queries on Superset. +""" +import os.path +import traceback +from operator import itemgetter +from pathlib import Path +from typing import List, Optional, Tuple + +import click +from prompt_toolkit import PromptSession +from prompt_toolkit.completion import WordCompleter +from prompt_toolkit.history import FileHistory +from prompt_toolkit.lexers import PygmentsLexer +from prompt_toolkit.styles.pygments import style_from_pygments_cls +from pygments.lexers.sql import SqlLexer +from pygments.styles import get_style_by_name +from sqlparse.keywords import KEYWORDS +from tabulate import tabulate +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.exceptions import SupersetError + +sql_completer = WordCompleter(list(KEYWORDS)) +style = style_from_pygments_cls(get_style_by_name("stata-dark")) + + +@click.command() +@click.option( + "--database-id", + default=None, + help="Database ID (leave empty for options)", + type=click.INT, +) +@click.option("-e", "--execute", default=None, help="Run query non-interactively") +@click.pass_context +def sql( # pylint: disable=too-many-arguments + ctx: click.core.Context, + database_id: Optional[int], + execute: Optional[str] = None, +) -> None: + """ + Run SQL against an Apache Superset database. + """ + auth = ctx.obj["AUTH"] + url = URL(ctx.obj["INSTANCE"]) + client = SupersetClient(url, auth) + + databases = client.get_databases() + if not databases: + click.echo("No databases available") + return None + + if database_id is None: + click.echo("Choose the ID of a database to connect to:") + for database in sorted(databases, key=itemgetter("id")): + click.echo(f'({database["id"]}) {database["database_name"]}') + while database_id is None: + try: + choice = int(input("> ")) + if any(database["id"] == choice for database in databases): + database_id = choice + break + except ValueError: + pass + click.echo("Invalid choice") + + database_name = [ + database["database_name"] + for database in databases + if database["id"] == database_id + ][0] + + if execute: + return run_query(client, database_id, execute) + + return run_session(client, database_id, database_name, url) + + +def run_query(client: SupersetClient, database_id: int, query: str) -> None: + """ + Run a query in a given database. + """ + try: + results = client.run_query(database_id, query) + click.echo(tabulate(results, headers=results.columns, showindex=False)) + except SupersetError as ex: + click.echo( + click.style( + "\n".join(error["message"] for error in ex.errors), + fg="bright_red", + ), + ) + except Exception: # pylint: disable=broad-except + traceback.print_exc() + + +def run_session( + client: SupersetClient, + database_id: int, + database_name: str, + url: URL, +) -> None: + """ + Run SQL queries in an interactive session. + """ + history = Path(os.path.expanduser("~/.config/superset-sdk/")) + if not history.exists(): + history.mkdir(parents=True) + + session = PromptSession( + lexer=PygmentsLexer(SqlLexer), + completer=sql_completer, + style=style, + history=FileHistory(history / f"sql-{url.host}-{database_id}.history"), + ) + + lines: List[str] = [] + quote_context = " " + padding = " " * (len(database_name) - 1) + while True: + prompt = f"{database_name}> " if not lines else f"{padding}{quote_context}. " + try: + line = session.prompt(prompt) + except KeyboardInterrupt: + lines = [] + quote_context = " " + continue # Control-C pressed. Try again. + except EOFError: + break # Control-D pressed. + + lines.append(line) + query = "\n".join(lines) + + is_terminated, quote_context = get_query_termination(query) + if is_terminated: + run_query(client, database_id, query) + + click.echo("Goodbye!") + + +def get_query_termination(query: str) -> Tuple[bool, str]: + """ + Check if a query is ended or if a new line should be created. + + This function looks for a semicolon at the end, making sure no quotation mark must be + closed. + """ + quote_context = " " + quote_chars = ('"', "'", "`") + + for query_char in query: + if quote_context == query_char: + quote_context = " " + else: + for quote in quote_chars: + if quote_context == " " and quote == query_char: + quote_context = quote + + is_terminated = quote_context == " " and query.endswith(";") + + return is_terminated, quote_context diff --git a/src/preset_cli/cli/superset/sync/__init__.py b/src/preset_cli/cli/superset/sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/preset_cli/cli/superset/sync/dbt/__init__.py b/src/preset_cli/cli/superset/sync/dbt/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/preset_cli/cli/superset/sync/dbt/command.py b/src/preset_cli/cli/superset/sync/dbt/command.py new file mode 100644 index 00000000..40c00498 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dbt/command.py @@ -0,0 +1,90 @@ +""" +A command to sync DBT models/metrics to Superset and dashboards back as exposures. +""" + +import os.path +from pathlib import Path +from typing import Optional + +import click +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.cli.superset.sync.dbt.dashboards import sync_dashboards +from preset_cli.cli.superset.sync.dbt.databases import sync_database +from preset_cli.cli.superset.sync.dbt.datasets import sync_datasets +from preset_cli.exceptions import DatabaseNotFoundError + + +@click.command() +@click.argument("manifest", type=click.Path(exists=True, resolve_path=True)) +@click.option("--project", help="Name of the DBT project", default="default") +@click.option("--target", help="Target name", default="dev") +@click.option( + "--profiles", + help="Location of profiles.yml file", + type=click.Path(exists=True, resolve_path=True), +) +@click.option( + "--exposures", + help="Path to file where exposures will be written", + type=click.Path(exists=False), +) +@click.option( + "--import-db", + is_flag=True, + default=False, + help="Import database to Superset", +) +@click.option( + "--disallow-edits", + default=True, + help="Mark resources as manged externally to prevent edits", +) +@click.option("--external-url-prefix", default="", help="Base URL for resources") +@click.pass_context +def dbt( # pylint: disable=too-many-arguments + ctx: click.core.Context, + manifest: str, + project: str, + target: str, + profiles: Optional[str] = None, + exposures: Optional[str] = None, + import_db: bool = False, + disallow_edits: bool = True, + external_url_prefix: str = "", +) -> None: + """ + Sync DBT models/metrics to Superset and dashboards to DBT exposures. + """ + auth = ctx.obj["AUTH"] + url = URL(ctx.obj["INSTANCE"]) + client = SupersetClient(url, auth) + + if profiles is None: + profiles = os.path.expanduser("~/.dbt/profiles.yml") + + try: + database = sync_database( + client, + Path(profiles), + project, + target, + import_db, + disallow_edits, + external_url_prefix, + ) + except DatabaseNotFoundError: + click.echo("No database was found, pass --import-db to create") + return + + datasets = sync_datasets( + client, + Path(manifest), + database, + disallow_edits, + external_url_prefix, + ) + if exposures: + exposures = os.path.expanduser(exposures) + sync_dashboards(client, Path(exposures), datasets) diff --git a/src/preset_cli/cli/superset/sync/dbt/dashboards.py b/src/preset_cli/cli/superset/sync/dbt/dashboards.py new file mode 100644 index 00000000..399eded8 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dbt/dashboards.py @@ -0,0 +1,83 @@ +""" +Sync Superset dashboards as DBT exposures. +""" + +import json +from pathlib import Path +from typing import Any, List + +import yaml + +from preset_cli.api.clients.superset import SupersetClient + +# XXX: DashboardResponseType and DatasetResponseType + + +def get_depends_on(client: SupersetClient, dashboard: Any) -> List[str]: + """ + Get all the DBT dependencies for a given dashboard. + """ + + url = client.baseurl / "api/v1/dashboard" / str(dashboard["id"]) / "datasets" + + session = client.auth.get_session() + headers = client.auth.get_headers() + response = session.get(url, headers=headers) + response.raise_for_status() + + payload = response.json() + + depends_on = [] + for dataset in payload["result"]: + full_dataset = client.get_dataset(int(dataset["id"])) + extra = json.loads(full_dataset["result"]["extra"] or "{}") + if "depends_on" in extra: + depends_on.append(extra["depends_on"]) + + return depends_on + + +def sync_dashboards( # pylint: disable=too-many-locals + client: SupersetClient, + exposures_path: Path, + datasets: List[Any], +) -> None: + """ + Write dashboards back to DBT as exposures. + """ + dashboards_ids = set() + + for dataset in datasets: + url = client.baseurl / "api/v1/dataset" / str(dataset["id"]) / "related_objects" + + session = client.auth.get_session() + headers = client.auth.get_headers() + response = session.get(url, headers=headers) + response.raise_for_status() + + payload = response.json() + for dashboard in payload["dashboards"]["result"]: + dashboards_ids.add(dashboard["id"]) + + exposures = [] + for dashboard_id in dashboards_ids: + dashboards = client.get_dashboards(id=dashboard_id) + if dashboards: + dashboard = dashboards[0] + first_owner = dashboard["owners"][0] + exposure = { + "name": dashboard["dashboard_title"], + "type": "dashboard", + "maturity": "high" if dashboard["published"] else "low", + "url": str(client.baseurl / dashboard["url"].lstrip("/")), + "description": "", + "depends_on": get_depends_on(client, dashboard), + "owner": { + "name": first_owner["first_name"] + " " + first_owner["last_name"], + "email": first_owner.get("email", "unknown"), + }, + } + exposures.append(exposure) + + with open(exposures_path, "w", encoding="utf-8") as output: + yaml.safe_dump({"version": 2, "exposures": exposures}, output, sort_keys=False) diff --git a/src/preset_cli/cli/superset/sync/dbt/databases.py b/src/preset_cli/cli/superset/sync/dbt/databases.py new file mode 100644 index 00000000..9898bbb1 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dbt/databases.py @@ -0,0 +1,88 @@ +""" +Sync DBT database to Superset. +""" + +import logging +from pathlib import Path +from typing import Any + +import yaml +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.cli.superset.sync.dbt.lib import build_sqlalchemy_uri +from preset_cli.exceptions import DatabaseNotFoundError + +_logger = logging.getLogger(__name__) + + +def sync_database( # pylint: disable=too-many-locals, too-many-arguments + client: SupersetClient, + profiles_path: Path, + project_name: str, + target_name: str, + import_db: bool, + disallow_edits: bool, # pylint: disable=unused-argument + external_url_prefix: str, +) -> Any: + """ + Read target database from a DBT profiles.yml and sync to Superset. + """ + base_url = URL(external_url_prefix) if external_url_prefix else None + + with open(profiles_path, encoding="utf-8") as input_: + profiles = yaml.load(input_, Loader=yaml.SafeLoader) + + if project_name not in profiles: + raise Exception(f"Project {project_name} not found in {profiles_path}") + + project = profiles[project_name] + outputs = project["outputs"] + + if target_name not in outputs: + raise Exception( + f"Target {target_name} not found in the outputs of {profiles_path}", + ) + + target = outputs[target_name] + sqlalchemy_uri = str(build_sqlalchemy_uri(target)) + + database_name = f"{project_name}_{target_name}" + databases = client.get_databases( + sqlalchemy_uri=sqlalchemy_uri, + database_name=database_name, + ) + if len(databases) > 1: + raise Exception( + "More than one database with the same SQLAlchemy URI and name found", + ) + + # read additional metadata that should be applied to the DB + meta = target.get("meta", {}).get("superset", {}) + + if base_url and "external_url" not in meta: + meta["external_url"] = str(base_url.with_fragment("!/overview")) + + if databases: + _logger.info("Found an existing database, updating it") + database = databases[0] + database = client.update_database( + database_id=database["id"], + database_name=database_name, + # TODO (betodealmeida): depends on https://github.com/apache/superset/pull/19318 + # is_managed_externally=disallow_edits, + **meta, + ) + elif not import_db: + raise DatabaseNotFoundError() + else: + _logger.info("No database found, creating it") + database = client.create_database( + database_name=database_name, + sqlalchemy_uri=sqlalchemy_uri, + # TODO (betodealmeida): depends on https://github.com/apache/superset/pull/19318 + # is_managed_externally=disallow_edits, + **meta, + ) + + return database diff --git a/src/preset_cli/cli/superset/sync/dbt/datasets.py b/src/preset_cli/cli/superset/sync/dbt/datasets.py new file mode 100644 index 00000000..bdae8d10 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dbt/datasets.py @@ -0,0 +1,115 @@ +""" +Sync DBT datasets/metrics to Superset. +""" + +# pylint: disable=consider-using-f-string + +import json +import logging +from collections import defaultdict +from pathlib import Path +from typing import Any, Dict, List + +import yaml +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.api.operators import OneToMany + +_logger = logging.getLogger(__name__) + + +def get_metric_expression(metric: Dict[str, Any]) -> str: + """ + Return a SQL expression for a given DBT metric. + """ + return "{type}({sql})".format(**metric) + + +def sync_datasets( # pylint: disable=too-many-locals + client: SupersetClient, + manifest_path: Path, + database: Any, + disallow_edits: bool, + external_url_prefix: str, +) -> List[Any]: + """ + Read the DBT manifest and import models as datasets with metrics. + """ + base_url = URL(external_url_prefix) if external_url_prefix else None + + with open(manifest_path, encoding="utf-8") as input_: + manifest = yaml.load(input_, Loader=yaml.SafeLoader) + + # extract metrics + metrics: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + for metric in manifest["metrics"].values(): + for unique_id in metric["depends_on"]["nodes"]: + metrics[unique_id].append(metric) + + # add datasets + datasets = [] + configs = list(manifest["sources"].values()) + list(manifest["nodes"].values()) + for config in configs: + filters = { + "database": OneToMany(database["id"]), + "schema": config["schema"], + "table_name": config["name"], + } + existing = client.get_datasets(**filters) + if len(existing) > 1: + raise Exception("More than one dataset found") + + if existing: + dataset = existing[0] + _logger.info("Updating dataset %s", config["unique_id"]) + else: + _logger.info("Creating dataset %s", config["unique_id"]) + dataset = client.create_dataset( + database=database["id"], + schema=config["schema"], + table_name=config["name"], + ) + + extra = {k: config[k] for k in ["resource_type", "unique_id"]} + if config["resource_type"] == "source": + extra["depends_on"] = "source('{schema}', '{name}')".format(**config) + else: # config["resource_type"] == "model" + extra["depends_on"] = "ref('{name}')".format(**config) + + dataset_metrics = [] + if config["resource_type"] == "model": + for metric in metrics[config["unique_id"]]: + dataset_metrics.append( + { + "expression": get_metric_expression(metric), + "metric_name": metric["name"], + "metric_type": metric["type"], + "verbose_name": get_metric_expression(metric), + "description": metric["description"], + **metric["meta"], + }, + ) + + # update dataset clearing metrics... + update = { + "description": config["description"], + "extra": json.dumps(extra), + "is_managed_externally": disallow_edits, + "metrics": [], + } + if base_url: + fragment = "!/{resource_type}/{unique_id}".format(**config) + update["external_url"] = str(base_url.with_fragment(fragment)) + client.update_dataset(dataset["id"], **update) + + # ...then update metrics + if dataset_metrics: + update = { + "metrics": dataset_metrics, + } + client.update_dataset(dataset["id"], **update) + + datasets.append(dataset) + + return datasets diff --git a/src/preset_cli/cli/superset/sync/dbt/lib.py b/src/preset_cli/cli/superset/sync/dbt/lib.py new file mode 100644 index 00000000..efe8c84d --- /dev/null +++ b/src/preset_cli/cli/superset/sync/dbt/lib.py @@ -0,0 +1,66 @@ +""" +Helper functions. +""" + +from typing import Any, Dict + +from sqlalchemy.engine.url import URL + + +def build_sqlalchemy_uri(target: Dict[str, Any]) -> URL: + """ + Build the SQLAlchemy URI for a given target. + """ + type_ = target.get("type") + + if type_ == "postgres": + return build_postgres_sqlalchemy_uri(target) + if type_ == "bigquery": + return build_bigquery_sqlalchemy_uri(target) + + raise Exception( + f"Unable to build a SQLAlchemy URI for a target of type {type_}. Please file an " + "issue at https://github.com/preset-io/superset-sdk/issues/new.", + ) + + +def build_postgres_sqlalchemy_uri(target: Dict[str, Any]) -> URL: + """ + Build the SQLAlchemy URI for a Postgres target. + """ + username = target["user"] + password = target["pass"] or None + host = target["host"] + port = target["port"] + dbname = target["dbname"] + + return URL( + drivername="postgresql+psycopg2", + username=username, + password=password, + host=host, + port=port, + database=dbname, + ) + + +def build_bigquery_sqlalchemy_uri(target: Dict[str, Any]) -> URL: + """ + Build the SQLAlchemy URI for a BigQuery target. + + Currently supports only configuration via ``keyfile``. + """ + parameter_map = { + "credentials_path": "keyfile", + "priority": "priority", + "location": "location", + "maximum_bytes_billed": "maximum_bytes_billed", + } + query = { + kwarg: str(target[key]) for kwarg, key in parameter_map.items() if key in target + } + return URL( + drivername="bigquery", + host=target["project"], + query=query, + ) diff --git a/src/preset_cli/cli/superset/sync/main.py b/src/preset_cli/cli/superset/sync/main.py new file mode 100644 index 00000000..90cdde6c --- /dev/null +++ b/src/preset_cli/cli/superset/sync/main.py @@ -0,0 +1,19 @@ +""" +Commands for syncing metastores to and from Superset. +""" + +import click + +from preset_cli.cli.superset.sync.dbt.command import dbt +from preset_cli.cli.superset.sync.native.command import native + + +@click.group() +def sync() -> None: + """ + Sync metadata between Superset and an external repository. + """ + + +sync.add_command(native) +sync.add_command(dbt) diff --git a/src/preset_cli/cli/superset/sync/native/__init__.py b/src/preset_cli/cli/superset/sync/native/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/preset_cli/cli/superset/sync/native/command.py b/src/preset_cli/cli/superset/sync/native/command.py new file mode 100644 index 00000000..7f15fc44 --- /dev/null +++ b/src/preset_cli/cli/superset/sync/native/command.py @@ -0,0 +1,193 @@ +""" +A command to sync Superset exports into a Superset instance. +""" + +import getpass +import importlib.util +from datetime import datetime, timezone +from io import BytesIO +from pathlib import Path +from types import ModuleType +from typing import Any, Dict, Tuple +from zipfile import ZipFile + +import click +import yaml +from jinja2 import Template +from sqlalchemy.engine.url import make_url +from yarl import URL + +from preset_cli.api.clients.superset import SupersetClient +from preset_cli.exceptions import SupersetError + +YAML_EXTENSIONS = {".yaml", ".yml"} + +# This should be identical to ``superset.models.core.PASSWORD_MASK``. It's duplicated here +# because we don't want to have the CLI to depend on the ``superset`` package. +PASSWORD_MASK = "X" * 10 + + +resource_types = { + "chart": "Slice", + "dashboard": "Dashboard", + "database": "Database", + "dataset": "SqlaTable", +} + + +def load_user_modules(root: Path) -> Dict[str, ModuleType]: + """ + Load user-defined modules so they can be used with Jinja2. + """ + modules = {} + for path in root.glob("*.py"): + spec = importlib.util.spec_from_file_location(path.stem, path) + if spec and spec.loader: + modules[path.stem] = importlib.util.module_from_spec(spec) + spec.loader.exec_module(modules[path.stem]) # type: ignore + + return modules + + +@click.command() +@click.argument("directory", type=click.Path(exists=True, resolve_path=True)) +@click.option( + "--overwrite", + is_flag=True, + default=False, + help="Overwrite existing resources", +) +@click.option( + "--option", + "-o", + multiple=True, + help="Custom values for templates (eg, country=BR)", +) +@click.option( + "--disallow-edits", + default=True, + help="Mark resources as manged externally to prevent edits", +) +@click.option("--external-url-prefix", default="", help="Base URL for resources") +@click.pass_context +def native( # pylint: disable=too-many-locals, too-many-arguments + ctx: click.core.Context, + directory: str, + option: Tuple[str, ...], + overwrite: bool = False, + disallow_edits: bool = True, # pylint: disable=unused-argument + external_url_prefix: str = "", +) -> None: + """ + Sync exported DBs/datasets/charts/dashboards to Superset. + """ + auth = ctx.obj["AUTH"] + url = URL(ctx.obj["INSTANCE"]) + client = SupersetClient(url, auth) + root = Path(directory) + + base_url = URL(external_url_prefix) if external_url_prefix else None + + # env for Jinja2 templating + env = dict(pair.split("=", 1) for pair in option if "=" in pair) # type: ignore + env["instance"] = url + env["functions"] = load_user_modules(root / "functions") + + # read all the YAML files + contents: Dict[str, str] = {} + queue = [root] + while queue: + path_name = queue.pop() + if path_name.is_dir(): + queue.extend(path_name.glob("*")) + elif path_name.suffix.lower() in YAML_EXTENSIONS: + with open(path_name, encoding="utf-8") as input_: + template = Template(input_.read()) + content = template.render(**env) + relative_path = path_name.relative_to(root) + + # mark resource as being managed externally + config = yaml.load(content, Loader=yaml.SafeLoader) + # TODO (betodealmeida): depends on https://github.com/apache/superset/pull/19315 + # config["is_managed_externally"] = disallow_edits + if base_url: + config["external_url"] = str( + base_url / str(relative_path), + ) + if relative_path.parts[0] == "databases": + prompt_for_passwords(relative_path, config) + + contents[str("bundle" / relative_path)] = yaml.safe_dump(config) + + # TODO (betodealmeida): use endpoint from https://github.com/apache/superset/pull/19220 + for resource in ["database", "dataset", "chart", "dashboard"]: + import_resource(resource, contents, client, overwrite) + + +def prompt_for_passwords(path: Path, config: Dict[str, Any]) -> None: + """ + Prompt user for masked passwords. + + Modify the config in place. + """ + uri = config["sqlalchemy_uri"] + password = make_url(uri).password + if password == PASSWORD_MASK and config.get("password") is None: + config["password"] = getpass.getpass( + f"Please provide the password for {path}: ", + ) + + +def import_resource( + resource: str, + contents: Dict[str, str], + client: SupersetClient, + overwrite: bool, +) -> None: + """ + Import a given resource. + """ + contents["bundle/metadata.yaml"] = yaml.dump( + dict( + version="1.0.0", + type=resource_types[resource], + timestamp=datetime.now(tz=timezone.utc).isoformat(), + ), + ) + + buf = BytesIO() + with ZipFile(buf, "w") as bundle: + for file_path, file_content in contents.items(): + with bundle.open(file_path, "w") as output: + output.write(file_content.encode()) + buf.seek(0) + try: + client.import_zip(resource, buf, overwrite=overwrite) + except SupersetError as ex: + click.echo( + click.style( + "\n".join(error["message"] for error in ex.errors), + fg="bright_red", + ), + ) + + # check if overwrite is needed: + existing = [ + key + for error in ex.errors + for key, value in error["extra"].items() + if "overwrite=true" in value + ] + if not existing: + raise ex + + existing_list = "\n".join("- " + name for name in existing) + click.echo( + click.style( + ( + "The following file(s) already exist. Pass --overwrite to " + f"replace them.\n{existing_list}" + ), + fg="bright_red", + ), + ) diff --git a/src/preset_cli/exceptions.py b/src/preset_cli/exceptions.py new file mode 100644 index 00000000..0e0673a0 --- /dev/null +++ b/src/preset_cli/exceptions.py @@ -0,0 +1,56 @@ +""" +Custom exceptions. +""" + +from enum import Enum +from typing import Any, Dict, List + +from typing_extensions import TypedDict + + +class ErrorLevel(str, Enum): + """ + Levels of errors that can exist within Superset. + """ + + INFO = "info" + WARNING = "warning" + ERROR = "error" + + +class ErrorPayload(TypedDict): + """ + A SIP-40 error payload. + """ + + message: str + error_type: str # import SupersetErrorType from Superset? + level: ErrorLevel + extra: Dict[str, Any] + + +class SupersetError(Exception): + """ + A SIP-40 compliant exception. + """ + + def __init__(self, errors: List[ErrorPayload]): + super().__init__() + self.errors = errors + + +class DatabaseNotFoundError(SupersetError): + """ + Exception when no database is found. + """ + + def __init__(self): + super().__init__( + [ + { + "message": "Database not found", + "error_type": "DATABASE_NOT_FOUND_ERROR", + "level": ErrorLevel.ERROR, + }, + ], + ) diff --git a/src/preset_cli/lib.py b/src/preset_cli/lib.py new file mode 100644 index 00000000..a1c7dbef --- /dev/null +++ b/src/preset_cli/lib.py @@ -0,0 +1,13 @@ +""" +Basic helper functions. +""" + +from pathlib import Path + + +def remove_root(file_path: str) -> str: + """ + Remove the first directory of a path. + """ + full_path = Path(file_path) + return str(Path(*full_path.parts[1:])) diff --git a/tests/api/clients/__init__.py b/tests/api/clients/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/api/client_test.py b/tests/api/clients/preset_test.py similarity index 89% rename from tests/api/client_test.py rename to tests/api/clients/preset_test.py index f9f7ff40..4aec1bf3 100644 --- a/tests/api/client_test.py +++ b/tests/api/clients/preset_test.py @@ -1,11 +1,11 @@ """ -Tests for ``preset_cli.api.client``. +Tests for ``preset_cli.api.clients.preset``. """ from requests_mock.mocker import Mocker from superset_sdk.auth.main import Auth -from preset_cli.api.client import PresetClient +from preset_cli.api.clients.preset import PresetClient def test_preset_client_get_teams(requests_mock: Mocker) -> None: diff --git a/tests/api/clients/superset_test.py b/tests/api/clients/superset_test.py new file mode 100644 index 00000000..00439be1 --- /dev/null +++ b/tests/api/clients/superset_test.py @@ -0,0 +1,1068 @@ +""" +Tests for ``preset_cli.api.clients.superset``. +""" +# pylint: disable=too-many-lines + +import json +from io import BytesIO +from unittest import mock + +import pytest +from pytest_mock import MockerFixture +from requests_mock.mocker import Mocker +from yarl import URL + +from preset_cli import __version__ +from preset_cli.api.clients.superset import ( + SupersetClient, + convert_to_adhoc_column, + convert_to_adhoc_metric, +) +from preset_cli.api.operators import OneToMany +from preset_cli.auth.main import Auth +from preset_cli.exceptions import SupersetError + + +def test_run_query(requests_mock: Mocker) -> None: + """ + Test the ``run_query`` method. + """ + requests_mock.post( + "https://superset.example.org/superset/sql_json/", + json={ + "query_id": 2, + "status": "success", + "data": [{"value": 1}], + "columns": [{"name": "value", "type": "INT", "is_date": False}], + "selected_columns": [{"name": "value", "type": "INT", "is_date": False}], + "expanded_columns": [], + "query": { + "changedOn": "2022-03-25T15:37:00.393660", + "changed_on": "2022-03-25T15:37:00.393660", + "dbId": 1, + "db": "examples", + "endDttm": 1648222620417.808, + "errorMessage": None, + "executedSql": "SELECT 1 AS value\nLIMIT 1001", + "id": "IrwwY8Ky14", + "queryId": 2, + "limit": 1000, + "limitingFactor": "NOT_LIMITED", + "progress": 100, + "rows": 1, + "schema": "public", + "ctas": False, + "serverId": 2, + "sql": "SELECT 1 AS value", + "sqlEditorId": "1", + "startDttm": 1648222620279.198000, + "state": "success", + "tab": "Untitled Query 1", + "tempSchema": None, + "tempTable": None, + "userId": 1, + "user": "admin admin", + "resultsKey": None, + "trackingUrl": None, + "extra": {"cancel_query": 35121, "progress": None}, + }, + }, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + results = client.run_query(database_id=1, sql="SELECT 1 AS value", limit=10) + assert results.to_dict() == {"value": {0: 1}} + + +def test_run_query_error(requests_mock: Mocker) -> None: + """ + Test the ``run_query`` method when an error occurs. + """ + errors = [ + { + "message": "Only SELECT statements are allowed against this database.", + "error_type": "DML_NOT_ALLOWED_ERROR", + "level": "error", + "extra": { + "issue_codes": [ + { + "code": 1022, + "message": ( + "Issue 1022 - Database does not allow data manipulation." + ), + }, + ], + }, + }, + ] + requests_mock.post( + "https://superset.example.org/superset/sql_json/", + json={"errors": errors}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + with pytest.raises(SupersetError) as excinfo: + client.run_query(database_id=1, sql="SSELECT 1 AS value", limit=10) + assert excinfo.value.errors == errors + + +def test_convert_to_adhoc_metric(mocker: MockerFixture) -> None: + """ + Test ``convert_to_adhoc_metric``. + """ + mocker.patch("preset_cli.api.clients.superset.uuid4", return_value=1234) + assert convert_to_adhoc_metric("COUNT(*)") == { + "aggregate": None, + "column": None, + "expressionType": "SQL", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(*)", + "optionName": "metric_1234", + "sqlExpression": "COUNT(*)", + } + + +def test_convert_to_adhoc_column() -> None: + """ + Test ``convert_to_adhoc_column`.. + """ + assert convert_to_adhoc_column("UPPER(name)") == { + "label": "UPPER(name)", + "sqlExpression": "UPPER(name)", + } + + +def test_get_data(requests_mock: Mocker) -> None: + """ + Test the ``run_query`` method. + """ + requests_mock.get( + "https://superset.example.org/api/v1/dataset/27", + json={ + "description_columns": {}, + "id": 27, + "label_columns": { + "cache_timeout": "Cache Timeout", + "columns.changed_on": "Columns Changed On", + "columns.column_name": "Columns Column Name", + "columns.created_on": "Columns Created On", + "columns.description": "Columns Description", + "columns.expression": "Columns Expression", + "columns.extra": "Columns Extra", + "columns.filterable": "Columns Filterable", + "columns.groupby": "Columns Groupby", + "columns.id": "Columns Id", + "columns.is_active": "Columns Is Active", + "columns.is_dttm": "Columns Is Dttm", + "columns.python_date_format": "Columns Python Date Format", + "columns.type": "Columns Type", + "columns.type_generic": "Columns Type Generic", + "columns.uuid": "Columns Uuid", + "columns.verbose_name": "Columns Verbose Name", + "database.backend": "Database Backend", + "database.database_name": "Database Database Name", + "database.id": "Database Id", + "datasource_type": "Datasource Type", + "default_endpoint": "Default Endpoint", + "description": "Description", + "extra": "Extra", + "fetch_values_predicate": "Fetch Values Predicate", + "filter_select_enabled": "Filter Select Enabled", + "id": "Id", + "is_sqllab_view": "Is Sqllab View", + "main_dttm_col": "Main Dttm Col", + "metrics": "Metrics", + "offset": "Offset", + "owners.first_name": "Owners First Name", + "owners.id": "Owners Id", + "owners.last_name": "Owners Last Name", + "owners.username": "Owners Username", + "schema": "Schema", + "sql": "Sql", + "table_name": "Table Name", + "template_params": "Template Params", + "url": "Url", + }, + "result": { + "cache_timeout": None, + "columns": [ + { + "changed_on": "2022-03-27T13:21:33.957609", + "column_name": "ts", + "created_on": "2022-03-27T13:21:33.957602", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 841, + "is_active": True, + "is_dttm": True, + "python_date_format": None, + "type": "TIMESTAMP WITHOUT TIME ZONE", + "type_generic": 2, + "uuid": "e607d7fd-90bf-4420-a35e-9dc7af555e0d", + "verbose_name": None, + }, + { + "changed_on": "2022-03-27T13:21:33.958499", + "column_name": "name", + "created_on": "2022-03-27T13:21:33.958493", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 842, + "is_active": True, + "is_dttm": False, + "python_date_format": None, + "type": "VARCHAR(255)", + "type_generic": 1, + "uuid": "76a523f0-1aad-4608-87a4-daf22172e1da", + "verbose_name": None, + }, + { + "changed_on": "2022-03-27T13:21:33.975750", + "column_name": "text", + "created_on": "2022-03-27T13:21:33.975743", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 843, + "is_active": True, + "is_dttm": False, + "python_date_format": None, + "type": "TEXT", + "type_generic": 1, + "uuid": "610b91b0-e8de-4703-bbe9-5b27fb6c6a4e", + "verbose_name": None, + }, + ], + "database": { + "backend": "postgresql", + "database_name": "superset_examples_dev", + "id": 3, + }, + "datasource_type": "table", + "default_endpoint": None, + "description": "", + "extra": json.dumps( + { + "resource_type": "model", + "unique_id": "model.superset_examples.messages_channels", + "depends_on": "ref('messages_channels')", + }, + ), + "fetch_values_predicate": None, + "filter_select_enabled": False, + "id": 27, + "is_sqllab_view": False, + "main_dttm_col": "ts", + "metrics": [ + { + "changed_on": "2022-03-27T13:21:34.298657", + "created_on": "2022-03-27T13:21:34.248023", + "d3format": None, + "description": "", + "expression": "count(*)", + "extra": None, + "id": 35, + "metric_name": "cnt", + "metric_type": "count", + "uuid": "c4b74ceb-a19c-494a-9b90-9c68ed5bc8cb", + "verbose_name": "count(*)", + "warning_text": None, + }, + ], + "offset": 0, + "owners": [], + "schema": "public", + "sql": None, + "table_name": "messages_channels", + "template_params": None, + "url": "/tablemodelview/edit/27", + }, + "show_columns": [ + "id", + "database.database_name", + "database.id", + "table_name", + "sql", + "filter_select_enabled", + "fetch_values_predicate", + "schema", + "description", + "main_dttm_col", + "offset", + "default_endpoint", + "cache_timeout", + "is_sqllab_view", + "template_params", + "owners.id", + "owners.username", + "owners.first_name", + "owners.last_name", + "columns.changed_on", + "columns.column_name", + "columns.created_on", + "columns.description", + "columns.expression", + "columns.filterable", + "columns.groupby", + "columns.id", + "columns.is_active", + "columns.extra", + "columns.is_dttm", + "columns.python_date_format", + "columns.type", + "columns.uuid", + "columns.verbose_name", + "metrics", + "datasource_type", + "url", + "extra", + "columns.type_generic", + "database.backend", + ], + "show_title": "Show Sqla Table", + }, + ) + requests_mock.post( + "https://superset.example.org/api/v1/chart/data", + json={ + "result": [ + { + "cache_key": "ddf314a4bb44dd7f4a2f5b880e3c2503", + "cached_dttm": None, + "cache_timeout": 300, + "applied_template_filters": [], + "annotation_data": {}, + "error": None, + "is_cached": None, + "query": """SELECT name AS name, + count(*) AS cnt +FROM public.messages_channels +GROUP BY name +LIMIT 10000; + +""", + "status": "success", + "stacktrace": None, + "rowcount": 29, + "from_dttm": None, + "to_dttm": None, + "colnames": ["name", "cnt"], + "indexnames": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + ], + "coltypes": [1, 0], + "data": [ + {"name": "newsletter", "cnt": 8}, + {"name": "support", "cnt": 114}, + {"name": "dashboard-level-access", "cnt": 57}, + {"name": "beginners", "cnt": 368}, + {"name": "community-feedback", "cnt": 27}, + {"name": "graduation", "cnt": 38}, + {"name": "superset_stage_alerts", "cnt": 3}, + {"name": "contributing", "cnt": 29}, + {"name": "github-notifications", "cnt": 2975}, + {"name": "helm-k8-deployment", "cnt": 72}, + {"name": "globalnav_search", "cnt": 1}, + {"name": "design", "cnt": 4}, + {"name": "localization", "cnt": 7}, + {"name": "commits", "cnt": 1}, + {"name": "apache-releases", "cnt": 53}, + {"name": "jobs", "cnt": 22}, + {"name": "general", "cnt": 383}, + {"name": "announcements", "cnt": 19}, + {"name": "visualization_plugins", "cnt": 50}, + {"name": "product_feedback", "cnt": 41}, + {"name": "dashboard-filters", "cnt": 75}, + {"name": "superset-champions", "cnt": 40}, + {"name": "introductions", "cnt": 141}, + {"name": "embedd-dashboards", "cnt": 10}, + {"name": "cypress-tests", "cnt": 7}, + {"name": "superset_prod_reports", "cnt": 3}, + {"name": "feature-requests", "cnt": 22}, + {"name": "dashboards", "cnt": 87}, + {"name": "developers", "cnt": 27}, + ], + "result_format": "json", + "applied_filters": [], + "rejected_filters": [], + }, + ], + }, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + results = client.get_data(27, ["cnt"], ["name"]) + assert results.to_dict() == { + "name": { + 0: "newsletter", + 1: "support", + 2: "dashboard-level-access", + 3: "beginners", + 4: "community-feedback", + 5: "graduation", + 6: "superset_stage_alerts", + 7: "contributing", + 8: "github-notifications", + 9: "helm-k8-deployment", + 10: "globalnav_search", + 11: "design", + 12: "localization", + 13: "commits", + 14: "apache-releases", + 15: "jobs", + 16: "general", + 17: "announcements", + 18: "visualization_plugins", + 19: "product_feedback", + 20: "dashboard-filters", + 21: "superset-champions", + 22: "introductions", + 23: "embedd-dashboards", + 24: "cypress-tests", + 25: "superset_prod_reports", + 26: "feature-requests", + 27: "dashboards", + 28: "developers", + }, + "cnt": { + 0: 8, + 1: 114, + 2: 57, + 3: 368, + 4: 27, + 5: 38, + 6: 3, + 7: 29, + 8: 2975, + 9: 72, + 10: 1, + 11: 4, + 12: 7, + 13: 1, + 14: 53, + 15: 22, + 16: 383, + 17: 19, + 18: 50, + 19: 41, + 20: 75, + 21: 40, + 22: 141, + 23: 10, + 24: 7, + 25: 3, + 26: 22, + 27: 87, + 28: 27, + }, + } + + +def test_get_data_parameters(mocker: MockerFixture) -> None: + """ + Test different parameters passed to ``get_data``. + """ + auth = mocker.MagicMock() + session = auth.get_session() + session.get().json.return_value = { + "result": { + "columns": [], + "metrics": [], + }, + } + session.post().json.return_value = { + "result": [ + { + "data": [{"a": 1}], + }, + ], + } + mocker.patch("preset_cli.api.clients.superset.uuid4", return_value=1234) + + client = SupersetClient("https://superset.example.org/", auth) + client.get_data( + 27, + ["cnt"], + ["name"], + time_column="ts", + is_timeseries=True, + granularity="P1M", + ) + + session.post.assert_has_calls( + [ + mock.call(), + mock.call( + URL("https://superset.example.org/api/v1/chart/data"), + json={ + "datasource": {"id": 27, "type": "table"}, + "force": False, + "queries": [ + { + "annotation_layers": [], + "applied_time_extras": {}, + "columns": [{"label": "name", "sqlExpression": "name"}], + "custom_form_data": {}, + "custom_params": {}, + "extras": { + "having": "", + "having_druid": [], + "time_grain_sqla": "P1M", + "where": "", + }, + "filters": [], + "granularity": "ts", + "is_timeseries": True, + "metrics": [ + { + "aggregate": None, + "column": None, + "expressionType": "SQL", + "hasCustomLabel": False, + "isNew": False, + "label": "cnt", + "optionName": "metric_1234", + "sqlExpression": "cnt", + }, + ], + "order_desc": True, + "orderby": [], + "row_limit": 10000, + "time_range": "No filter", + "timeseries_limit": 0, + "url_params": {}, + }, + ], + "result_format": "json", + "result_type": "full", + }, + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + "User-Agent": f"Apache Superset Client ({__version__})", + "Referer": "https://superset.example.org/", + }, + ), + mock.call().json(), + ], + ) + + +def test_get_data_time_column_error(mocker: MockerFixture) -> None: + """ + Test when the time column is ambiguous in ``get_data``. + """ + auth = mocker.MagicMock() + session = auth.get_session() + session.get().json.return_value = { + "result": { + "columns": [ + {"column_name": "event_time", "is_dttm": True}, + {"column_name": "server_time", "is_dttm": True}, + ], + "metrics": [], + }, + } + session.post().json.return_value = { + "result": [ + { + "data": [{"a": 1}], + }, + ], + } + mocker.patch("preset_cli.api.clients.superset.uuid4", return_value=1234) + + client = SupersetClient("https://superset.example.org/", auth) + with pytest.raises(Exception) as excinfo: + client.get_data(27, ["cnt"], ["name"]) + assert str(excinfo.value) == ( + "Unable to determine time column, please pass `time_series` " + "as one of: event_time, server_time" + ) + + +def test_get_data_error(mocker: MockerFixture) -> None: + """ + Test ``get_data`` with a generic error. + """ + auth = mocker.MagicMock() + session = auth.get_session() + session.get().json.return_value = { + "result": { + "columns": [], + "metrics": [], + }, + } + session.post().json.return_value = {"errors": "An error occurred"} + + client = SupersetClient("https://superset.example.org/", auth) + with pytest.raises(SupersetError) as excinfo: + client.get_data(27, ["cnt"], ["name"], time_column="ts") + assert excinfo.value.errors == "An error occurred" + + +def test_get_resource(requests_mock: Mocker) -> None: + """ + Test the generic ``get_resource`` method. + """ + # the payload schema is irrelevant, since it's passed through unmodified + requests_mock.get( + "https://superset.example.org/api/v1/database/1", + json={"Hello": "world"}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resource("database", 1) + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_get_resources(requests_mock: Mocker) -> None: + """ + Test the generic ``get_resources`` method. + """ + # the payload schema is irrelevant, since it's passed through unmodified + requests_mock.get( + "https://superset.example.org/api/v1/database/?q=(filters:!())", + json={"result": {"Hello": "world"}}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resources("database") + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_get_resources_filtered_equal(requests_mock: Mocker) -> None: + """ + Test the generic ``get_resources`` method with an equal filter. + """ + # the payload schema is irrelevant, since it's passed through unmodified + requests_mock.get( + "https://superset.example.org/api/v1/database/" + "?q=(filters:!((col:database_name,opr:eq,value:my_db)))", + json={"result": {"Hello": "world"}}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resources("database", database_name="my_db") + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_get_resources_filtered_one_to_many(requests_mock: Mocker) -> None: + """ + Test the generic ``get_resources`` method with a one-to-many filter. + """ + # the payload schema is irrelevant, since it's passed through unmodified + requests_mock.get( + "https://superset.example.org/api/v1/database/" + "?q=(filters:!((col:database,opr:rel_o_m,value:1)))", + json={"result": {"Hello": "world"}}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.get_resources("database", database=OneToMany(1)) + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_create_resource(requests_mock: Mocker) -> None: + """ + Test the generic ``create_resource`` method. + """ + requests_mock.post( + "https://superset.example.org/api/v1/database/", + json={"Hello": "world"}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.create_resource( + "database", + database_name="my_db", + sqlalchemy_uri="gsheets://", + ) + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + assert requests_mock.last_request.json() == { + "database_name": "my_db", + "sqlalchemy_uri": "gsheets://", + } + + +def test_update_resource(requests_mock: Mocker) -> None: + """ + Test the generic ``update_resource`` method. + """ + requests_mock.put( + "https://superset.example.org/api/v1/database/1", + json={"Hello": "world"}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.update_resource( + resource="database", + resource_id=1, + database_name="my_other_db", + ) + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + assert requests_mock.last_request.json() == { + "database_name": "my_other_db", + } + + +def test_update_resource_with_query_args(requests_mock: Mocker) -> None: + """ + Test the generic ``update_resource`` method. + """ + requests_mock.put( + "https://superset.example.org/api/v1/database/1?override_columns=true", + json={"Hello": "world"}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.update_resource( + resource="database", + resource_id=1, + query_args={"override_columns": "true"}, + database_name="my_other_db", + ) + assert response == {"Hello": "world"} + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + assert requests_mock.last_request.json() == { + "database_name": "my_other_db", + } + + +def test_get_database(mocker: MockerFixture) -> None: + """ + Test the ``get_database`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resource = mocker.patch.object(client, "get_resource") + + client.get_database(1) + get_resource.assert_called_with("database", 1) + + +def test_get_databases(mocker: MockerFixture) -> None: + """ + Test the ``get_databases`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resources = mocker.patch.object(client, "get_resources") + + client.get_databases() + get_resources.assert_called_with("database") + client.get_databases(database_name="my_db") + get_resources.assert_called_with("database", database_name="my_db") + + +def test_create_database(mocker: MockerFixture) -> None: + """ + Test the ``create_database`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + create_resource = mocker.patch.object(client, "create_resource") + + client.create_database(database_name="my_db", sqlalchemy_uri="gsheets://") + create_resource.assert_called_with( + "database", + database_name="my_db", + sqlalchemy_uri="gsheets://", + ) + + +def test_update_database(mocker: MockerFixture) -> None: + """ + Test the ``update_database`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + update_resource = mocker.patch.object(client, "update_resource") + + client.update_database(1, database_name="my_other_db") + update_resource.assert_called_with( + "database", + 1, + {"override_columns": "true"}, + database_name="my_other_db", + ) + + +def test_get_dataset(mocker: MockerFixture) -> None: + """ + Test the ``get_dataset`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resource = mocker.patch.object(client, "get_resource") + + client.get_dataset(1) + get_resource.assert_called_with("dataset", 1) + + +def test_get_datasets(mocker: MockerFixture) -> None: + """ + Test the ``get_datasets`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resources = mocker.patch.object(client, "get_resources") + + client.get_datasets() + get_resources.assert_called_with("dataset") + client.get_datasets(dataset_name="my_db") + get_resources.assert_called_with("dataset", dataset_name="my_db") + + +def test_create_dataset(mocker: MockerFixture) -> None: + """ + Test the ``create_dataset`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + create_resource = mocker.patch.object(client, "create_resource") + + client.create_dataset(dataset_name="my_db", sqlalchemy_uri="gsheets://") + create_resource.assert_called_with( + "dataset", + dataset_name="my_db", + sqlalchemy_uri="gsheets://", + ) + + +def test_update_dataset(mocker: MockerFixture) -> None: + """ + Test the ``update_dataset`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + update_resource = mocker.patch.object(client, "update_resource") + + client.update_dataset(1, dataset_name="my_other_db") + update_resource.assert_called_with("dataset", 1, dataset_name="my_other_db") + + +def test_get_dashboard(mocker: MockerFixture) -> None: + """ + Test the ``get_dashboard`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resource = mocker.patch.object(client, "get_resource") + + client.get_dashboard(1) + get_resource.assert_called_with("dashboard", 1) + + +def test_get_dashboards(mocker: MockerFixture) -> None: + """ + Test the ``get_dashboards`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + get_resources = mocker.patch.object(client, "get_resources") + + client.get_dashboards() + get_resources.assert_called_with("dashboard") + client.get_dashboards(dashboard_name="my_db") + get_resources.assert_called_with("dashboard", dashboard_name="my_db") + + +def test_create_dashboard(mocker: MockerFixture) -> None: + """ + Test the ``create_dashboard`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + create_resource = mocker.patch.object(client, "create_resource") + + client.create_dashboard(dashboard_name="my_db", sqlalchemy_uri="gsheets://") + create_resource.assert_called_with( + "dashboard", + dashboard_name="my_db", + sqlalchemy_uri="gsheets://", + ) + + +def test_update_dashboard(mocker: MockerFixture) -> None: + """ + Test the ``update_dashboard`` method. + """ + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + update_resource = mocker.patch.object(client, "update_resource") + + client.update_dashboard(1, dashboard_name="my_other_db") + update_resource.assert_called_with("dashboard", 1, dashboard_name="my_other_db") + + +def test_export_zip(requests_mock: Mocker) -> None: + """ + Test the ``export_zip`` method. + """ + requests_mock.get( + "https://superset.example.org/api/v1/database/export/?q=%21%281%2C2%2C3%29", + content="I'm a ZIP".encode("utf-8"), + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + response = client.export_zip("database", [1, 2, 3]) + assert response.getvalue() == "I'm a ZIP".encode("utf-8") + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_export_zip_error(requests_mock: Mocker) -> None: + """ + Test the ``export_zip`` method when an error occurs. + """ + requests_mock.get( + "https://superset.example.org/api/v1/database/export/?q=%21%281%2C2%2C3%29", + json={"errors": "An error occurred"}, + status_code=500, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + with pytest.raises(SupersetError) as excinfo: + client.export_zip("database", [1, 2, 3]) + assert excinfo.value.errors == "An error occurred" + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + + +def test_import_zip(requests_mock: Mocker) -> None: + """ + Test the ``import_zip`` method. + """ + requests_mock.post( + "https://superset.example.org/api/v1/database/import/", + json={"message": "OK"}, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + data = BytesIO("I'm a ZIP".encode("utf-8")) + response = client.import_zip("database", data, overwrite=True) + assert response is True + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) + assert requests_mock.last_request.headers["Accept"] == "application/json" + + boundary = ( + requests_mock.last_request.headers["Content-type"] + .split(";")[1] + .split("=")[1] + .strip() + ) + assert requests_mock.last_request.text == ( + f'--{boundary}\r\nContent-Disposition: form-data; name="overwrite"\r\n\r\n' + f'true\r\n--{boundary}\r\nContent-Disposition: form-data; name="formData"; ' + f'filename="formData"\r\n\r\nI\'m a ZIP\r\n--{boundary}--\r\n' + ) + + +def test_import_zip_error(requests_mock: Mocker) -> None: + """ + Test the ``import_zip`` method when an error occurs. + """ + requests_mock.post( + "https://superset.example.org/api/v1/database/import/", + json={"errors": "An error occurred"}, + status_code=500, + ) + + auth = Auth() + client = SupersetClient("https://superset.example.org/", auth) + + data = BytesIO("I'm a ZIP".encode("utf-8")) + with pytest.raises(SupersetError) as excinfo: + client.import_zip("database", data, overwrite=True) + assert excinfo.value.errors == "An error occurred" + assert ( + requests_mock.last_request.headers["Referer"] == "https://superset.example.org/" + ) diff --git a/tests/auth/__init__.py b/tests/auth/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/auth/jwt_test.py b/tests/auth/jwt_test.py new file mode 100644 index 00000000..2ca9bd9a --- /dev/null +++ b/tests/auth/jwt_test.py @@ -0,0 +1,13 @@ +""" +Test JWT auth. +""" + +from preset_cli.auth.jwt import JWTAuth + + +def test_jwt_auth() -> None: + """ + Test the ``JWTAuth`` authentication mechanism. + """ + auth = JWTAuth("my-token") + assert auth.get_headers() == {"Authorization": "Bearer my-token"} diff --git a/tests/auth/main_test.py b/tests/auth/main_test.py new file mode 100644 index 00000000..0d6e5c9f --- /dev/null +++ b/tests/auth/main_test.py @@ -0,0 +1,45 @@ +""" +Test authentication mechanisms. +""" + +from pytest_mock import MockerFixture +from requests_mock.mocker import Mocker +from yarl import URL + +from preset_cli.auth.main import Auth, UsernamePasswordAuth + + +def test_auth(mocker: MockerFixture) -> None: + """ + Tests for the base class ``Auth``. + """ + requests = mocker.patch("preset_cli.auth.main.requests") + + auth = Auth() + assert auth.get_session() == requests.Session() + + +def test_username_password_auth(requests_mock: Mocker) -> None: + """ + Tests for the username/password authentication mechanism. + """ + csrf_token = "CSFR_TOKEN" + requests_mock.get( + "https://superset.example.org/login/", + text=f'', + ) + requests_mock.post("https://superset.example.org/login/") + + auth = UsernamePasswordAuth( + URL("https://superset.example.org/"), + "admin", + "password123", + ) + assert auth.get_headers() == { + "X-CSRFToken": csrf_token, + } + + assert ( + requests_mock.last_request.text + == "username=admin&password=password123&csrf_token=CSFR_TOKEN" + ) diff --git a/tests/cli/superset/export_test.py b/tests/cli/superset/export_test.py new file mode 100644 index 00000000..fb8c1740 --- /dev/null +++ b/tests/cli/superset/export_test.py @@ -0,0 +1,146 @@ +""" +Tests for the export command. +""" +# pylint: disable=redefined-outer-name, invalid-name + +from io import BytesIO +from pathlib import Path +from unittest import mock +from zipfile import ZipFile + +import pytest +from click.testing import CliRunner +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture + +from preset_cli.auth.main import Auth +from preset_cli.cli.superset.export import export_resource +from preset_cli.cli.superset.main import superset + + +@pytest.fixture +def database_export() -> BytesIO: + """ + Fixture for the contents of a simple database export. + """ + contents = { + "dashboard_export/metadata.yaml": "Metadata", + "dashboard_export/databases/gsheets.yaml": "GSheets", + } + + buf = BytesIO() + with ZipFile(buf, "w") as bundle: + for file_name, file_contents in contents.items(): + with bundle.open(file_name, "w") as output: + output.write(file_contents.encode()) + buf.seek(0) + return buf + + +def test_export_resource( + mocker: MockerFixture, + fs: FakeFilesystem, + database_export: BytesIO, +) -> None: + """ + Test ``export_resource``. + """ + root = Path("/path/to/root") + fs.create_dir(root) + + client = mocker.MagicMock() + client.export_zip.return_value = database_export + + export_resource(resource="database", root=root, client=client, overwrite=False) + + # check that the database was written to the directory + with open(root / "databases/gsheets.yaml", encoding="utf-8") as input_: + assert input_.read() == "GSheets" + + # metadata file should be ignored + assert not (root / "metadata.yaml").exists() + + +def test_export_resource_overwrite( + mocker: MockerFixture, + fs: FakeFilesystem, + database_export: BytesIO, +) -> None: + """ + Test that we need to confirm overwrites. + """ + root = Path("/path/to/root") + fs.create_dir(root) + + client = mocker.MagicMock() + client.export_zip.return_value = database_export + + export_resource(resource="database", root=root, client=client, overwrite=False) + with pytest.raises(Exception) as excinfo: + export_resource(resource="database", root=root, client=client, overwrite=False) + assert str(excinfo.value) == ( + "File already exists and --overwrite was not specified: " + "/path/to/root/databases/gsheets.yaml" + ) + + export_resource(resource="database", root=root, client=client, overwrite=True) + + +def test_export(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``export`` command. + """ + # root must exist for command to succeed + root = Path("/path/to/root") + fs.create_dir(root) + + SupersetClient = mocker.patch("preset_cli.cli.superset.export.SupersetClient") + client = SupersetClient() + export_resource = mocker.patch("preset_cli.cli.superset.export.export_resource") + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + + runner = CliRunner() + result = runner.invoke( + superset, + ["https://superset.example.org/", "export", "/path/to/root"], + catch_exceptions=False, + ) + assert result.exit_code == 0 + export_resource.assert_has_calls( + [ + mock.call("database", Path("/path/to/root"), client, False), + mock.call("dataset", Path("/path/to/root"), client, False), + mock.call("chart", Path("/path/to/root"), client, False), + mock.call("dashboard", Path("/path/to/root"), client, False), + ], + ) + + +def test_export_with_custom_auth(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``export`` command. + """ + # root must exist for command to succeed + root = Path("/path/to/root") + fs.create_dir(root) + + SupersetClient = mocker.patch("preset_cli.cli.superset.export.SupersetClient") + client = SupersetClient() + export_resource = mocker.patch("preset_cli.cli.superset.export.export_resource") + + runner = CliRunner() + result = runner.invoke( + superset, + ["https://superset.example.org/", "export", "/path/to/root"], + catch_exceptions=False, + obj={"AUTH": Auth()}, + ) + assert result.exit_code == 0 + export_resource.assert_has_calls( + [ + mock.call("database", Path("/path/to/root"), client, False), + mock.call("dataset", Path("/path/to/root"), client, False), + mock.call("chart", Path("/path/to/root"), client, False), + mock.call("dashboard", Path("/path/to/root"), client, False), + ], + ) diff --git a/tests/cli/superset/main_test.py b/tests/cli/superset/main_test.py deleted file mode 100644 index ea075528..00000000 --- a/tests/cli/superset/main_test.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -Tests for the Superset dispatcher. -""" - -import click -from click.testing import CliRunner - -from preset_cli.cli.superset.main import mutate_commands, superset - - -def test_mutate_commands() -> None: - """ - Test ``mutate_commands``. - """ - - @click.group() - def source_group() -> None: - """ - A simple group of commands. - """ - - @click.command() - @click.argument("name") - def source_command(name: str) -> None: - """ - Say hello. - """ - click.echo(f"Hello, {name}!") - - source_group.add_command(source_command) - - @click.group() - def source_subgroup() -> None: - """ - A simple subgroup. - """ - - @click.command() - @click.argument("name") - def source_subcommand(name: str) -> None: - """ - Say goodbye - """ - click.echo(f"Goodbye, {name}!") - - source_subgroup.add_command(source_subcommand) - source_group.add_command(source_subgroup) - - @click.group() - @click.pass_context - def target_group(ctx: click.core.Context) -> None: - """ - The target group to which commands will be added to. - """ - ctx.ensure_object(dict) - ctx.obj["WORKSPACES"] = ["instance1", "instance2"] - - mutate_commands(source_group, target_group) - - runner = CliRunner() - - result = runner.invoke( - target_group, - ["source-command", "Alice"], - catch_exceptions=False, - ) - assert result.exit_code == 0 - assert ( - result.output - == """ -instance1 -Hello, Alice! - -instance2 -Hello, Alice! -""" - ) - - result = runner.invoke( - target_group, - ["source-subgroup", "source-subcommand", "Alice"], - catch_exceptions=False, - ) - assert result.exit_code == 0 - assert ( - result.output - == """ -instance1 -Goodbye, Alice! - -instance2 -Goodbye, Alice! -""" - ) - - -def test_superset() -> None: - """ - Test the ``superset`` command. - """ - runner = CliRunner() - - result = runner.invoke(superset, ["--help"], catch_exceptions=False) - assert result.exit_code == 0 - assert ( - result.output - == """Usage: superset [OPTIONS] COMMAND [ARGS]... - - Send commands to one or more Superset instances. - -Options: - --help Show this message and exit. - -Commands: - export - sql - sync -""" - ) - - result = runner.invoke(superset, ["export", "--help"], catch_exceptions=False) - assert result.exit_code == 0 - assert ( - result.output - == """Usage: superset export [OPTIONS] DIRECTORY - -Options: - --overwrite Overwrite existing resources - --help Show this message and exit. -""" - ) diff --git a/tests/cli/superset/sql_test.py b/tests/cli/superset/sql_test.py new file mode 100644 index 00000000..89ba3946 --- /dev/null +++ b/tests/cli/superset/sql_test.py @@ -0,0 +1,325 @@ +""" +Test the ``sql`` command. +""" +# pylint: disable=invalid-name, unused-argument, redefined-outer-name + +from io import StringIO +from pathlib import Path + +import pandas as pd +from click.testing import CliRunner +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture +from yarl import URL + +from preset_cli.cli.superset.main import superset +from preset_cli.cli.superset.sql import run_query, run_session +from preset_cli.exceptions import ErrorLevel, SupersetError + + +def test_run_query(mocker: MockerFixture) -> None: + """ + Test ``run_query``. + """ + client = mocker.MagicMock() + client.run_query.return_value = pd.DataFrame([{"answer": 42}]) + click = mocker.patch("preset_cli.cli.superset.sql.click") + + run_query(client=client, database_id=1, query="SELECT 42 AS answer") + client.run_query.assert_called_with(1, "SELECT 42 AS answer") + click.echo.assert_called_with(" answer\n--------\n 42") + + +def test_run_query_superset_error(mocker: MockerFixture) -> None: + """ + Test ``run_query`` when a ``SupersetError`` happens. + """ + client = mocker.MagicMock() + client.run_query.side_effect = SupersetError( + [ + { + "message": "Only SELECT statements are allowed against this database.", + "error_type": "DML_NOT_ALLOWED_ERROR", + "level": ErrorLevel.ERROR, + "extra": { + "issue_codes": [ + { + "code": 1022, + "message": "Issue 1022 - Database does not allow data manipulation.", + }, + ], + }, + }, + ], + ) + click = mocker.patch("preset_cli.cli.superset.sql.click") + + run_query(client=client, database_id=1, query="SSELECT 1") + click.style.assert_called_with( + "Only SELECT statements are allowed against this database.", + fg="bright_red", + ) + + +def test_run_query_exception(mocker: MockerFixture) -> None: + """ + Test ``run_query`` when a different exception happens. + """ + client = mocker.MagicMock() + client.run_query.side_effect = Exception("Unexpected error") + traceback = mocker.patch("preset_cli.cli.superset.sql.traceback") + + run_query(client=client, database_id=1, query="SSELECT 1") + traceback.print_exc.assert_called_with() + + +def test_run_session(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``run_session``. + """ + history = Path("/path/to/.config/superset-sdk/") + os = mocker.patch("preset_cli.cli.superset.sql.os") + os.path.expanduser.return_value = str(history) + + client = mocker.MagicMock() + client.run_query.return_value = pd.DataFrame([{"answer": 42}]) + + stdout = mocker.patch("sys.stdout", new_callable=StringIO) + PromptSession = mocker.patch("preset_cli.cli.superset.sql.PromptSession") + session = PromptSession() + session.prompt.side_effect = ["SELECT 42 AS answer;", "", EOFError()] + + run_session( + client=client, + database_id=1, + database_name="GSheets", + url=URL("https://superset.example.org/"), + ) + result = stdout.getvalue() + assert ( + result + == """ answer +-------- + 42 +Goodbye! +""" + ) + + +def test_run_session_multiline(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``run_session`` with multilines. + """ + history = Path("/path/to/.config/superset-sdk/") + os = mocker.patch("preset_cli.cli.superset.sql.os") + os.path.expanduser.return_value = str(history) + + client = mocker.MagicMock() + client.run_query.return_value = pd.DataFrame([{"the\nanswer": "foo\nbar"}]) + + stdout = mocker.patch("sys.stdout", new_callable=StringIO) + PromptSession = mocker.patch("preset_cli.cli.superset.sql.PromptSession") + session = PromptSession() + session.prompt.side_effect = [ + """SELECT 'foo\nbar' AS "the\nanswer";""", + "", + EOFError(), + ] + + run_session( + client=client, + database_id=1, + database_name="GSheets", + url=URL("https://superset.example.org/"), + ) + result = stdout.getvalue() + assert ( + result + == """the +answer +-------- +foo +bar +Goodbye! +""" + ) + + +def test_run_session_ctrl_c(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test that ``CTRL-C`` does not exit the REPL. + """ + history = Path("/path/to/.config/superset-sdk/") + os = mocker.patch("preset_cli.cli.superset.sql.os") + os.path.expanduser.return_value = str(history) + + client = mocker.MagicMock() + client.run_query.return_value = pd.DataFrame([{"answer": 42}]) + + stdout = mocker.patch("sys.stdout", new_callable=StringIO) + PromptSession = mocker.patch("preset_cli.cli.superset.sql.PromptSession") + session = PromptSession() + session.prompt.side_effect = [KeyboardInterrupt(), "SELECT 1;", EOFError()] + + run_session( + client=client, + database_id=1, + database_name="GSheets", + url=URL("https://superset.example.org/"), + ) + result = stdout.getvalue() + assert ( + result + == """ answer +-------- + 42 +Goodbye! +""" + ) + + +def test_run_session_history_exists(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``run_session``. + """ + history = Path("/path/to/.config/superset-sdk/") + os = mocker.patch("preset_cli.cli.superset.sql.os") + os.path.expanduser.return_value = str(history) + history.mkdir(parents=True) + + client = mocker.MagicMock() + client.run_query.return_value = pd.DataFrame([{"answer": 42}]) + + stdout = mocker.patch("sys.stdout", new_callable=StringIO) + PromptSession = mocker.patch("preset_cli.cli.superset.sql.PromptSession") + session = PromptSession() + session.prompt.side_effect = ["SELECT 42 AS answer;", "", EOFError()] + + run_session( + client=client, + database_id=1, + database_name="GSheets", + url=URL("https://superset.example.org/"), + ) + result = stdout.getvalue() + assert ( + result + == """ answer +-------- + 42 +Goodbye! +""" + ) + + +def test_sql_run_query(mocker: MockerFixture) -> None: + """ + Test the ``sql`` command in programmatic mode (run single query). + """ + SupersetClient = mocker.patch("preset_cli.cli.superset.sql.SupersetClient") + client = SupersetClient() + client.get_databases.return_value = [{"id": 1, "database_name": "GSheets"}] + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + run_query = mocker.patch("preset_cli.cli.superset.sql.run_query") + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sql", + "-e", + "SELECT 1", + "--database-id", + "1", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + run_query.assert_called_with(client, 1, "SELECT 1") + + +def test_sql_run_session(mocker: MockerFixture) -> None: + """ + Test the ``sql`` command in session mode (REPL). + """ + SupersetClient = mocker.patch("preset_cli.cli.superset.sql.SupersetClient") + client = SupersetClient() + client.get_databases.return_value = [{"id": 1, "database_name": "GSheets"}] + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + run_session = mocker.patch("preset_cli.cli.superset.sql.run_session") + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sql", + "--database-id", + "1", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + run_session.assert_called_with( + client, + 1, + "GSheets", + URL("https://superset.example.org/"), + ) + + +def test_sql_run_query_no_databases(mocker: MockerFixture) -> None: + """ + Test the ``sql`` command when no databases are found. + """ + SupersetClient = mocker.patch("preset_cli.cli.superset.sql.SupersetClient") + client = SupersetClient() + client.get_databases.return_value = [] + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + mocker.patch("preset_cli.cli.superset.sql.run_query") + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sql", + "-e", + "SELECT 1", + "--database-id", + "1", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + assert result.output == "No databases available\n" + + +def test_sql_choose_database(mocker: MockerFixture) -> None: + """ + Test the ``sql`` command choosing a DB interactively. + """ + SupersetClient = mocker.patch("preset_cli.cli.superset.sql.SupersetClient") + client = SupersetClient() + client.get_databases.return_value = [ + {"id": 1, "database_name": "GSheets"}, + {"id": 2, "database_name": "Trino"}, + ] + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + mocker.patch("preset_cli.cli.superset.sql.input", side_effect=["3", "invalid", "1"]) + run_query = mocker.patch("preset_cli.cli.superset.sql.run_query") + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sql", + "-e", + "SELECT 1", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + run_query.assert_called_with(client, 1, "SELECT 1") diff --git a/tests/cli/superset/sync/__init__.py b/tests/cli/superset/sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cli/superset/sync/dbt/__init__.py b/tests/cli/superset/sync/dbt/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cli/superset/sync/dbt/command_test.py b/tests/cli/superset/sync/dbt/command_test.py new file mode 100644 index 00000000..766463a6 --- /dev/null +++ b/tests/cli/superset/sync/dbt/command_test.py @@ -0,0 +1,196 @@ +""" +Tests for the DBT import command. +""" +# pylint: disable=invalid-name + +from pathlib import Path + +from click.testing import CliRunner +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture + +from preset_cli.cli.superset.main import superset +from preset_cli.exceptions import DatabaseNotFoundError + + +def test_dbt(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``dbt`` command. + """ + root = Path("/path/to/root") + fs.create_dir(root) + manifest = root / "default/target/manifest.json" + fs.create_file(manifest) + profiles = root / ".dbt/profiles.yml" + fs.create_file(profiles) + exposures = root / "models/exposures.yml" + fs.create_file(exposures) + + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.SupersetClient", + ) + client = SupersetClient() + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + sync_database = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_database", + ) + sync_datasets = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_datasets", + ) + sync_dashboards = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_dashboards", + ) + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sync", + "dbt", + str(manifest), + "--profiles", + str(profiles), + "--exposures", + str(exposures), + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + sync_database.assert_called_with( + client, + profiles, + "default", + "dev", + False, + True, + "", + ) + sync_datasets.assert_called_with(client, manifest, sync_database(), True, "") + sync_dashboards.assert_called_with(client, exposures, sync_datasets()) + + +def test_dbt_no_exposures(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``dbt`` command when no exposures file is passed. + """ + root = Path("/path/to/root") + fs.create_dir(root) + manifest = root / "default/target/manifest.json" + fs.create_file(manifest) + profiles = root / ".dbt/profiles.yml" + fs.create_file(profiles) + + mocker.patch("preset_cli.cli.superset.sync.dbt.command.SupersetClient") + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + mocker.patch("preset_cli.cli.superset.sync.dbt.command.sync_database") + mocker.patch("preset_cli.cli.superset.sync.dbt.command.sync_datasets") + sync_dashboards = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_dashboards", + ) + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sync", + "dbt", + str(manifest), + "--profiles", + str(profiles), + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + sync_dashboards.assert_not_called() + + +def test_dbt_default_profile(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``dbt`` command when the profile is not passed + """ + root = Path("/path/to/root") + fs.create_dir(root) + manifest = root / "default/target/manifest.json" + fs.create_file(manifest) + profiles = root / ".dbt/profiles.yml" + fs.create_file(profiles) + exposures = root / "models/exposures.yml" + fs.create_file(exposures) + + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.SupersetClient", + ) + client = SupersetClient() + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + sync_database = mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_database", + ) + mocker.patch("preset_cli.cli.superset.sync.dbt.command.sync_datasets") + mocker.patch("preset_cli.cli.superset.sync.dbt.command.sync_dashboards") + os = mocker.patch("preset_cli.cli.superset.sync.dbt.command.os") + os.path.expanduser.return_value = str(profiles) + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sync", + "dbt", + str(manifest), + "--exposures", + str(exposures), + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + sync_database.assert_called_with( + client, + profiles, + "default", + "dev", + False, + True, + "", + ) + + +def test_dbt_no_database(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``dbt`` command when no database is found and ``--import-db`` not passed. + """ + root = Path("/path/to/root") + fs.create_dir(root) + manifest = root / "default/target/manifest.json" + fs.create_file(manifest) + profiles = root / ".dbt/profiles.yml" + fs.create_file(profiles) + exposures = root / "models/exposures.yml" + fs.create_file(exposures) + + mocker.patch("preset_cli.cli.superset.sync.dbt.command.SupersetClient") + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + mocker.patch( + "preset_cli.cli.superset.sync.dbt.command.sync_database", + side_effect=DatabaseNotFoundError(), + ) + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sync", + "dbt", + str(manifest), + "--profiles", + str(profiles), + "--exposures", + str(exposures), + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + assert result.output == "No database was found, pass --import-db to create\n" diff --git a/tests/cli/superset/sync/dbt/dashboards_test.py b/tests/cli/superset/sync/dbt/dashboards_test.py new file mode 100644 index 00000000..f09619e2 --- /dev/null +++ b/tests/cli/superset/sync/dbt/dashboards_test.py @@ -0,0 +1,452 @@ +""" +Tests for ``preset_cli.cli.superset.sync.dbt.dashboards``. +""" +# pylint: disable=invalid-name + +import copy +import json +from pathlib import Path +from typing import Any, Dict + +import yaml +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture +from yarl import URL + +from preset_cli.cli.superset.sync.dbt.dashboards import get_depends_on, sync_dashboards + +dashboard_response: Dict[str, Any] = { + "result": { + "certification_details": None, + "certified_by": None, + "changed_by": { + "first_name": "admin", + "id": 1, + "last_name": "admin", + "username": "admin", + }, + "changed_by_name": "admin admin", + "changed_by_url": "/superset/profile/admin", + "changed_on": "2022-03-27T13:23:25.741970", + "changed_on_delta_humanized": "29 seconds ago", + "charts": ["Example chart"], + "css": None, + "dashboard_title": "Example dashboard", + "id": 12, + "json_metadata": None, + "owners": [ + {"first_name": "admin", "id": 1, "last_name": "admin", "username": "admin"}, + ], + "position_json": None, + "published": False, + "roles": [], + "slug": None, + "thumbnail_url": "/api/v1/dashboard/12/thumbnail/1f7a46435e3ff1fefc4adc9e73aa0ae7/", + "url": "/superset/dashboard/12/", + }, +} + + +datasets_response: Dict[str, Any] = { + "result": [ + { + "cache_timeout": None, + "column_formats": {}, + "column_types": [1, 2], + "columns": [ + { + "certification_details": None, + "certified_by": None, + "column_name": "name", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 842, + "is_certified": False, + "is_dttm": False, + "python_date_format": None, + "type": "VARCHAR(255)", + "type_generic": 1, + "verbose_name": None, + "warning_markdown": None, + }, + ], + "database": { + "allow_multi_schema_metadata_fetch": False, + "allows_cost_estimate": None, + "allows_subquery": True, + "allows_virtual_table_explore": True, + "backend": "postgresql", + "disable_data_preview": False, + "explore_database_id": 3, + "id": 3, + "name": "superset_examples_dev", + }, + "datasource_name": "messages_channels", + "default_endpoint": None, + "edit_url": "/tablemodelview/edit/27", + "fetch_values_predicate": None, + "filter_select": False, + "filter_select_enabled": False, + "granularity_sqla": [["ts", "ts"]], + "health_check_message": None, + "id": 27, + "is_sqllab_view": False, + "main_dttm_col": "ts", + "metrics": [ + { + "certification_details": None, + "certified_by": None, + "d3format": None, + "description": "", + "expression": "count(*)", + "id": 35, + "is_certified": False, + "metric_name": "cnt", + "verbose_name": "count(*)", + "warning_markdown": None, + "warning_text": None, + }, + ], + "name": "public.messages_channels", + "offset": 0, + "order_by_choices": [ + ['["name", True]', "name [asc]"], + ['["name", False]', "name [desc]"], + ['["text", True]', "text [asc]"], + ['["text", False]', "text [desc]"], + ['["ts", True]', "ts [asc]"], + ['["ts", False]', "ts [desc]"], + ], + "owners": [], + "params": None, + "perm": "[superset_examples_dev].[messages_channels](id:27)", + "schema": "public", + "select_star": "SELECT *\nFROM public.messages_channels\nLIMIT 100", + "sql": None, + "table_name": "messages_channels", + "template_params": None, + "time_grain_sqla": [ + [None, "Original value"], + ["PT1S", "Second"], + ["PT1M", "Minute"], + ["PT1H", "Hour"], + ["P1D", "Day"], + ["P1W", "Week"], + ["P1M", "Month"], + ["P3M", "Quarter"], + ["P1Y", "Year"], + ], + "type": "table", + "uid": "27__table", + "verbose_map": {"__timestamp": "Time", "cnt": "count(*)", "name": "name"}, + }, + ], +} + + +dataset_response = { + "description_columns": {}, + "id": 27, + "label_columns": { + "cache_timeout": "Cache Timeout", + "columns.changed_on": "Columns Changed On", + "columns.column_name": "Columns Column Name", + "columns.created_on": "Columns Created On", + "columns.description": "Columns Description", + "columns.expression": "Columns Expression", + "columns.extra": "Columns Extra", + "columns.filterable": "Columns Filterable", + "columns.groupby": "Columns Groupby", + "columns.id": "Columns Id", + "columns.is_active": "Columns Is Active", + "columns.is_dttm": "Columns Is Dttm", + "columns.python_date_format": "Columns Python Date Format", + "columns.type": "Columns Type", + "columns.type_generic": "Columns Type Generic", + "columns.uuid": "Columns Uuid", + "columns.verbose_name": "Columns Verbose Name", + "database.backend": "Database Backend", + "database.database_name": "Database Database Name", + "database.id": "Database Id", + "datasource_type": "Datasource Type", + "default_endpoint": "Default Endpoint", + "description": "Description", + "extra": "Extra", + "fetch_values_predicate": "Fetch Values Predicate", + "filter_select_enabled": "Filter Select Enabled", + "id": "Id", + "is_sqllab_view": "Is Sqllab View", + "main_dttm_col": "Main Dttm Col", + "metrics": "Metrics", + "offset": "Offset", + "owners.first_name": "Owners First Name", + "owners.id": "Owners Id", + "owners.last_name": "Owners Last Name", + "owners.username": "Owners Username", + "schema": "Schema", + "sql": "Sql", + "table_name": "Table Name", + "template_params": "Template Params", + "url": "Url", + }, + "result": { + "cache_timeout": None, + "columns": [ + { + "changed_on": "2022-03-27T13:21:33.957609", + "column_name": "ts", + "created_on": "2022-03-27T13:21:33.957602", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 841, + "is_active": True, + "is_dttm": True, + "python_date_format": None, + "type": "TIMESTAMP WITHOUT TIME ZONE", + "type_generic": 2, + "uuid": "e607d7fd-90bf-4420-a35e-9dc7af555e0d", + "verbose_name": None, + }, + { + "changed_on": "2022-03-27T13:21:33.958499", + "column_name": "name", + "created_on": "2022-03-27T13:21:33.958493", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 842, + "is_active": True, + "is_dttm": False, + "python_date_format": None, + "type": "VARCHAR(255)", + "type_generic": 1, + "uuid": "76a523f0-1aad-4608-87a4-daf22172e1da", + "verbose_name": None, + }, + { + "changed_on": "2022-03-27T13:21:33.975750", + "column_name": "text", + "created_on": "2022-03-27T13:21:33.975743", + "description": None, + "expression": None, + "extra": None, + "filterable": True, + "groupby": True, + "id": 843, + "is_active": True, + "is_dttm": False, + "python_date_format": None, + "type": "TEXT", + "type_generic": 1, + "uuid": "610b91b0-e8de-4703-bbe9-5b27fb6c6a4e", + "verbose_name": None, + }, + ], + "database": { + "backend": "postgresql", + "database_name": "superset_examples_dev", + "id": 3, + }, + "datasource_type": "table", + "default_endpoint": None, + "description": "", + "extra": json.dumps( + { + "resource_type": "model", + "unique_id": "model.superset_examples.messages_channels", + "depends_on": "ref('messages_channels')", + }, + ), + "fetch_values_predicate": None, + "filter_select_enabled": False, + "id": 27, + "is_sqllab_view": False, + "main_dttm_col": "ts", + "metrics": [ + { + "changed_on": "2022-03-27T13:21:34.298657", + "created_on": "2022-03-27T13:21:34.248023", + "d3format": None, + "description": "", + "expression": "count(*)", + "extra": None, + "id": 35, + "metric_name": "cnt", + "metric_type": "count", + "uuid": "c4b74ceb-a19c-494a-9b90-9c68ed5bc8cb", + "verbose_name": "count(*)", + "warning_text": None, + }, + ], + "offset": 0, + "owners": [], + "schema": "public", + "sql": None, + "table_name": "messages_channels", + "template_params": None, + "url": "/tablemodelview/edit/27", + }, + "show_columns": [ + "id", + "database.database_name", + "database.id", + "table_name", + "sql", + "filter_select_enabled", + "fetch_values_predicate", + "schema", + "description", + "main_dttm_col", + "offset", + "default_endpoint", + "cache_timeout", + "is_sqllab_view", + "template_params", + "owners.id", + "owners.username", + "owners.first_name", + "owners.last_name", + "columns.changed_on", + "columns.column_name", + "columns.created_on", + "columns.description", + "columns.expression", + "columns.filterable", + "columns.groupby", + "columns.id", + "columns.is_active", + "columns.extra", + "columns.is_dttm", + "columns.python_date_format", + "columns.type", + "columns.uuid", + "columns.verbose_name", + "metrics", + "datasource_type", + "url", + "extra", + "columns.type_generic", + "database.backend", + ], + "show_title": "Show Sqla Table", +} + +related_objects_response = { + "charts": { + "count": 1, + "result": [{"id": 134, "slice_name": "Example chart", "viz_type": "pie"}], + }, + "dashboards": { + "count": 1, + "result": [ + { + "id": 12, + "json_metadata": None, + "slug": None, + "title": "Example dashboard", + }, + ], + }, +} + + +def test_get_depends_on(mocker: MockerFixture) -> None: + """ + Test ``get_depends_on``. + """ + client = mocker.MagicMock() + client.get_dataset.return_value = dataset_response + session = client.auth.get_session() + session.get().json.return_value = datasets_response + + depends_on = get_depends_on(client, dashboard_response["result"]) + assert depends_on == ["ref('messages_channels')"] + + +def test_get_depends_on_no_extra(mocker: MockerFixture) -> None: + """ + Test ``get_depends_on``. + """ + client = mocker.MagicMock() + modified_dataset_response = copy.deepcopy(dataset_response) + modified_dataset_response["result"]["extra"] = None # type: ignore + client.get_dataset.return_value = modified_dataset_response + session = client.auth.get_session() + session.get().json.return_value = datasets_response + + depends_on = get_depends_on(client, dashboard_response["result"]) + assert depends_on == [] + + +def test_sync_dashboards(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_dashboards``. + """ + root = Path("/path/to/root") + fs.create_dir(root / "models") + exposures = root / "models/exposures.yml" + + client = mocker.MagicMock() + client.baseurl = URL("https://superset.example.org/") + client.get_dashboards.return_value = [dashboard_response["result"]] + session = client.auth.get_session() + session.get().json.return_value = related_objects_response + mocker.patch( + "preset_cli.cli.superset.sync.dbt.dashboards.get_depends_on", + return_value=["ref('messages_channels')"], + ) + + datasets = [dataset_response["result"]] + sync_dashboards(client, exposures, datasets) + + with open(exposures, encoding="utf-8") as input_: + contents = yaml.load(input_, Loader=yaml.SafeLoader) + assert contents == { + "version": 2, + "exposures": [ + { + "name": "Example dashboard", + "type": "dashboard", + "maturity": "low", + "url": "https://superset.example.org/superset/dashboard/12/", + "description": "", + "depends_on": ["ref('messages_channels')"], + "owner": {"name": "admin admin", "email": "unknown"}, + }, + ], + } + + +def test_sync_dashboards_no_dashboards( + mocker: MockerFixture, + fs: FakeFilesystem, +) -> None: + """ + Test ``sync_dashboards`` when no dashboads use the datasets. + """ + root = Path("/path/to/root") + fs.create_dir(root / "models") + exposures = root / "models/exposures.yml" + + client = mocker.MagicMock() + client.baseurl = URL("https://superset.example.org/") + client.get_dashboards.return_value = [] + session = client.auth.get_session() + session.get().json.return_value = related_objects_response + + datasets = [dataset_response["result"]] + sync_dashboards(client, exposures, datasets) + + with open(exposures, encoding="utf-8") as input_: + contents = yaml.load(input_, Loader=yaml.SafeLoader) + assert contents == { + "version": 2, + "exposures": [], + } diff --git a/tests/cli/superset/sync/dbt/databases_test.py b/tests/cli/superset/sync/dbt/databases_test.py new file mode 100644 index 00000000..62c87d89 --- /dev/null +++ b/tests/cli/superset/sync/dbt/databases_test.py @@ -0,0 +1,233 @@ +""" +Tests for ``preset_cli.cli.superset.sync.dbt.databases``. +""" +# pylint: disable=invalid-name + +from pathlib import Path + +import pytest +import yaml +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture + +from preset_cli.cli.superset.sync.dbt.databases import sync_database +from preset_cli.exceptions import DatabaseNotFoundError + + +def test_sync_database_new(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_database`` when we want to import a new DB. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.databases.build_sqlalchemy_uri", + return_value="dummy://", + ) + client = mocker.MagicMock() + client.get_databases.return_value = [] + + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="dev", + import_db=True, + disallow_edits=False, + external_url_prefix="", + ) + + client.create_database.assert_called_with( + database_name="my_project_dev", + sqlalchemy_uri="dummy://", + ) + + +def test_sync_database_no_project(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_database`` when the project is invalid. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + client = mocker.MagicMock() + client.get_databases.return_value = [] + + with pytest.raises(Exception) as excinfo: + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_other_project", + target_name="dev", + import_db=True, + disallow_edits=False, + external_url_prefix="", + ) + assert ( + str(excinfo.value) + == "Project my_other_project not found in /path/to/.dbt/profiles.yml" + ) + + +def test_sync_database_no_target(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_database`` when the target is invalid. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + client = mocker.MagicMock() + client.get_databases.return_value = [] + + with pytest.raises(Exception) as excinfo: + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="prod", + import_db=True, + disallow_edits=False, + external_url_prefix="", + ) + assert ( + str(excinfo.value) + == "Target prod not found in the outputs of /path/to/.dbt/profiles.yml" + ) + + +def test_sync_database_multiple_databases( + mocker: MockerFixture, + fs: FakeFilesystem, +) -> None: + """ + Test ``sync_database`` when multiple databases are found. + + This should not happen, since database names are unique. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.databases.build_sqlalchemy_uri", + return_value="dummy://", + ) + client = mocker.MagicMock() + client.get_databases.return_value = [ + {"id": 1, "database_name": "my_project_dev", "sqlalchemy_uri": "dummy://"}, + {"id": 2, "database_name": "my_project_dev", "sqlalchemy_uri": "dummy://"}, + ] + + with pytest.raises(Exception) as excinfo: + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="dev", + import_db=True, + disallow_edits=False, + external_url_prefix="", + ) + assert ( + str(excinfo.value) + == "More than one database with the same SQLAlchemy URI and name found" + ) + + +def test_sync_database_external_url_prefix( + mocker: MockerFixture, + fs: FakeFilesystem, +) -> None: + """ + Test ``sync_database`` with an external URL prefix. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.databases.build_sqlalchemy_uri", + return_value="dummy://", + ) + client = mocker.MagicMock() + client.get_databases.return_value = [] + + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="dev", + import_db=True, + disallow_edits=False, + external_url_prefix="https://dbt.example.org/", + ) + + client.create_database.assert_called_with( + database_name="my_project_dev", + sqlalchemy_uri="dummy://", + external_url="https://dbt.example.org/#!/overview", + ) + + +def test_sync_database_existing(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_database`` when we want to import an existing DB. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.databases.build_sqlalchemy_uri", + return_value="dummy://", + ) + client = mocker.MagicMock() + client.get_databases.return_value = [ + {"id": 1, "database_name": "my_project_dev", "sqlalchemy_uri": "dummy://"}, + ] + + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="dev", + import_db=True, + disallow_edits=False, + external_url_prefix="", + ) + + client.update_database.assert_called_with( + database_id=1, + database_name="my_project_dev", + ) + + +def test_sync_database_new_no_import(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_database`` when we want to import a new DB. + """ + fs.create_file( + "/path/to/.dbt/profiles.yml", + contents=yaml.dump({"my_project": {"outputs": {"dev": {}}}}), + ) + mocker.patch( + "preset_cli.cli.superset.sync.dbt.databases.build_sqlalchemy_uri", + return_value="dummy://", + ) + client = mocker.MagicMock() + client.get_databases.return_value = [] + + with pytest.raises(DatabaseNotFoundError): + sync_database( + client=client, + profiles_path=Path("/path/to/.dbt/profiles.yml"), + project_name="my_project", + target_name="dev", + import_db=False, + disallow_edits=False, + external_url_prefix="", + ) diff --git a/tests/cli/superset/sync/dbt/datasets_test.py b/tests/cli/superset/sync/dbt/datasets_test.py new file mode 100644 index 00000000..dc1625d1 --- /dev/null +++ b/tests/cli/superset/sync/dbt/datasets_test.py @@ -0,0 +1,464 @@ +""" +Tests for ``preset_cli.cli.superset.sync.dbt.datasets``. +""" +# pylint: disable=invalid-name + +import json +from pathlib import Path +from unittest import mock + +import pytest +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture + +from preset_cli.cli.superset.sync.dbt.datasets import sync_datasets + +manifest_config = { + "metrics": { + "metric.superset_examples.cnt": { + "fqn": ["superset_examples", "slack", "cnt"], + "unique_id": "metric.superset_examples.cnt", + "package_name": "superset_examples", + "root_path": "/Users/beto/Projects/dbt-examples/superset_examples", + "path": "slack/schema.yml", + "original_file_path": "models/slack/schema.yml", + "model": "ref('messages_channels')", + "name": "cnt", + "description": "", + "label": "", + "type": "count", + "sql": "*", + "timestamp": None, + "filters": [], + "time_grains": [], + "dimensions": [], + "resource_type": "metric", + "meta": {}, + "tags": [], + "sources": [], + "depends_on": { + "macros": [], + "nodes": ["model.superset_examples.messages_channels"], + }, + "refs": [["messages_channels"]], + "created_at": 1642630986.1942852, + }, + }, + "sources": { + "source.superset_examples.public.messages": { + "fqn": ["superset_examples", "slack", "public", "messages"], + "database": "examples_dev", + "schema": "public", + "unique_id": "source.superset_examples.public.messages", + "package_name": "superset_examples", + "root_path": "/Users/beto/Projects/dbt-examples/superset_examples", + "path": "models/slack/schema.yml", + "original_file_path": "models/slack/schema.yml", + "name": "messages", + "source_name": "public", + "source_description": "", + "loader": "", + "identifier": "messages", + "resource_type": "source", + "quoting": { + "database": None, + "schema": None, + "identifier": None, + "column": None, + }, + "loaded_at_field": None, + "freshness": { + "warn_after": {"count": None, "period": None}, + "error_after": {"count": None, "period": None}, + "filter": None, + }, + "external": None, + "description": "Messages in the Slack channel", + "columns": {}, + "meta": {}, + "source_meta": {}, + "tags": [], + "config": {"enabled": True}, + "patch_path": None, + "unrendered_config": {}, + "relation_name": '"examples_dev"."public"."messages"', + "created_at": 1642628933.0432189, + }, + "source.superset_examples.public.channels": { + "fqn": ["superset_examples", "slack", "public", "channels"], + "database": "examples_dev", + "schema": "public", + "unique_id": "source.superset_examples.public.channels", + "package_name": "superset_examples", + "root_path": "/Users/beto/Projects/dbt-examples/superset_examples", + "path": "models/slack/schema.yml", + "original_file_path": "models/slack/schema.yml", + "name": "channels", + "source_name": "public", + "source_description": "", + "loader": "", + "identifier": "channels", + "resource_type": "source", + "quoting": { + "database": None, + "schema": None, + "identifier": None, + "column": None, + }, + "loaded_at_field": None, + "freshness": { + "warn_after": {"count": None, "period": None}, + "error_after": {"count": None, "period": None}, + "filter": None, + }, + "external": None, + "description": "Information about Slack channels", + "columns": {}, + "meta": {}, + "source_meta": {}, + "tags": [], + "config": {"enabled": True}, + "patch_path": None, + "unrendered_config": {}, + "relation_name": '"examples_dev"."public"."channels"', + "created_at": 1642628933.043388, + }, + }, + "nodes": { + "model.superset_examples.messages_channels": { + "raw_sql": """SELECT + messages.ts, + channels.name, + messages.text +FROM + {{ source ('public', 'messages') }} messages + JOIN {{ source ('public', 'channels') }} channels ON messages.channel_id = channels.id""", + "compiled": True, + "resource_type": "model", + "depends_on": { + "macros": [], + "nodes": [ + "source.superset_examples.public.channels", + "source.superset_examples.public.messages", + ], + }, + "config": { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "materialized": "view", + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "on_schema_change": "ignore", + "post-hook": [], + "pre-hook": [], + }, + "database": "examples_dev", + "schema": "public", + "fqn": ["superset_examples", "slack", "messages_channels"], + "unique_id": "model.superset_examples.messages_channels", + "package_name": "superset_examples", + "root_path": "/Users/beto/Projects/dbt-examples/superset_examples", + "path": "slack/messages_channels.sql", + "original_file_path": "models/slack/messages_channels.sql", + "name": "messages_channels", + "alias": "messages_channels", + "checksum": { + "name": "sha256", + "checksum": "b4ce232b28280daa522b37e12c36b67911e2a98456b8a3b99440075ec5564609", + }, + "tags": [], + "refs": [], + "sources": [["public", "channels"], ["public", "messages"]], + "description": "", + "columns": {}, + "meta": {}, + "docs": {"show": True}, + "patch_path": None, + "compiled_path": "target/compiled/superset_examples/models/slack/messages_channels.sql", + "build_path": None, + "deferred": False, + "unrendered_config": {"materialized": "view"}, + "created_at": 1642628933.004452, + "compiled_sql": """SELECT + messages.ts, + channels.name, + messages.text +FROM + "examples_dev"."public"."messages" messages + JOIN "examples_dev"."public"."channels" channels ON messages.channel_id = channels.id""", + "extra_ctes_injected": True, + "extra_ctes": [], + "relation_name": '"examples_dev"."public"."messages_channels"', + }, + }, +} + + +def test_sync_datasets_new(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_datasets`` when no datasets exist yet. + """ + manifest = Path("/path/to/root/default/target/manifest.json") + fs.create_file(manifest, contents=json.dumps(manifest_config)) + client = mocker.MagicMock() + client.get_datasets.return_value = [] + client.create_dataset.side_effect = [{"id": 1}, {"id": 2}, {"id": 3}] + + sync_datasets( + client=client, + manifest_path=manifest, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + ) + client.create_dataset.assert_has_calls( + [ + mock.call(database=1, schema="public", table_name="messages"), + mock.call(database=1, schema="public", table_name="channels"), + mock.call(database=1, schema="public", table_name="messages_channels"), + ], + ) + client.update_dataset.assert_has_calls( + [ + mock.call( + 1, + description="Messages in the Slack channel", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.messages", + "depends_on": "source('public', 'messages')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 2, + description="Information about Slack channels", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.channels", + "depends_on": "source('public', 'channels')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 3, + description="", + extra=json.dumps( + { + "resource_type": "model", + "unique_id": "model.superset_examples.messages_channels", + "depends_on": "ref('messages_channels')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 3, + metrics=[ + { + "expression": "count(*)", + "metric_name": "cnt", + "metric_type": "count", + "verbose_name": "count(*)", + "description": "", + }, + ], + ), + ], + ) + + +def test_sync_datasets_existing(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_datasets`` when datasets already exist. + """ + manifest = Path("/path/to/root/default/target/manifest.json") + fs.create_file(manifest, contents=json.dumps(manifest_config)) + client = mocker.MagicMock() + client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] + + sync_datasets( + client=client, + manifest_path=manifest, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + ) + client.create_dataset.assert_not_called() + client.update_dataset.assert_has_calls( + [ + mock.call( + 1, + description="Messages in the Slack channel", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.messages", + "depends_on": "source('public', 'messages')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 2, + description="Information about Slack channels", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.channels", + "depends_on": "source('public', 'channels')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 3, + description="", + extra=json.dumps( + { + "resource_type": "model", + "unique_id": "model.superset_examples.messages_channels", + "depends_on": "ref('messages_channels')", + }, + ), + is_managed_externally=False, + metrics=[], + ), + mock.call( + 3, + metrics=[ + { + "expression": "count(*)", + "metric_name": "cnt", + "metric_type": "count", + "verbose_name": "count(*)", + "description": "", + }, + ], + ), + ], + ) + + +def test_sync_datasets_multiple_existing( + mocker: MockerFixture, + fs: FakeFilesystem, +) -> None: + """ + Test ``sync_datasets`` when multiple datasets are found to exist. + """ + manifest = Path("/path/to/root/default/target/manifest.json") + fs.create_file(manifest, contents=json.dumps(manifest_config)) + client = mocker.MagicMock() + client.get_datasets.return_value = [{"id": 1}, {"id": 2}] + + with pytest.raises(Exception) as excinfo: + sync_datasets( + client=client, + manifest_path=manifest, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="", + ) + assert str(excinfo.value) == "More than one dataset found" + + +def test_sync_datasets_external_url(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``sync_datasets`` when passing external URL prefix. + """ + manifest = Path("/path/to/root/default/target/manifest.json") + fs.create_file(manifest, contents=json.dumps(manifest_config)) + client = mocker.MagicMock() + client.get_datasets.side_effect = [[{"id": 1}], [{"id": 2}], [{"id": 3}]] + + sync_datasets( + client=client, + manifest_path=manifest, + database={"id": 1}, + disallow_edits=False, + external_url_prefix="https://dbt.example.org/", + ) + client.create_dataset.assert_not_called() + client.update_dataset.assert_has_calls( + [ + mock.call( + 1, + description="Messages in the Slack channel", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.messages", + "depends_on": "source('public', 'messages')", + }, + ), + is_managed_externally=False, + metrics=[], + external_url=( + "https://dbt.example.org/#!/source/" + "source.superset_examples.public.messages" + ), + ), + mock.call( + 2, + description="Information about Slack channels", + extra=json.dumps( + { + "resource_type": "source", + "unique_id": "source.superset_examples.public.channels", + "depends_on": "source('public', 'channels')", + }, + ), + is_managed_externally=False, + metrics=[], + external_url=( + "https://dbt.example.org/#!/source/" + "source.superset_examples.public.channels" + ), + ), + mock.call( + 3, + description="", + extra=json.dumps( + { + "resource_type": "model", + "unique_id": "model.superset_examples.messages_channels", + "depends_on": "ref('messages_channels')", + }, + ), + is_managed_externally=False, + metrics=[], + external_url=( + "https://dbt.example.org/#!/model/" + "model.superset_examples.messages_channels" + ), + ), + mock.call( + 3, + metrics=[ + { + "expression": "count(*)", + "metric_name": "cnt", + "metric_type": "count", + "verbose_name": "count(*)", + "description": "", + }, + ], + ), + ], + ) diff --git a/tests/cli/superset/sync/dbt/lib_test.py b/tests/cli/superset/sync/dbt/lib_test.py new file mode 100644 index 00000000..3aa12ba2 --- /dev/null +++ b/tests/cli/superset/sync/dbt/lib_test.py @@ -0,0 +1,53 @@ +""" +Test for ``preset_cli.cli.superset.sync.dbt.lib``. +""" + +import pytest + +from preset_cli.cli.superset.sync.dbt.lib import build_sqlalchemy_uri + + +def test_build_sqlalchemy_uri_postgres() -> None: + """ + Test ``build_sqlalchemy_uri`` for PostgreSQL. + """ + config = { + "type": "postgres", + "user": "username", + "pass": "password123", + "host": "localhost", + "port": 5432, + "dbname": "db", + } + assert ( + str(build_sqlalchemy_uri(config)) + == "postgresql+psycopg2://username:password123@localhost:5432/db" + ) + + +def test_build_sqlalchemy_uri_bigquery() -> None: + """ + Test ``build_sqlalchemy_uri`` for BigQuery. + """ + config = { + "type": "bigquery", + "project": "my_project", + "keyfile": "/path/to/credentials.json", + } + assert ( + str(build_sqlalchemy_uri(config)) + == "bigquery://my_project?credentials_path=%2Fpath%2Fto%2Fcredentials.json" + ) + + +def test_build_sqlalchemy_uri_unsupported() -> None: + """ + Test ``build_sqlalchemy_uri`` for databases currently unsupported. + """ + config = {"type": "mysql"} + with pytest.raises(Exception) as excinfo: + build_sqlalchemy_uri(config) + assert str(excinfo.value) == ( + "Unable to build a SQLAlchemy URI for a target of type mysql. Please file " + "an issue at https://github.com/preset-io/superset-sdk/issues/new." + ) diff --git a/tests/cli/superset/sync/native/__init__.py b/tests/cli/superset/sync/native/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cli/superset/sync/native/command_test.py b/tests/cli/superset/sync/native/command_test.py new file mode 100644 index 00000000..811b758a --- /dev/null +++ b/tests/cli/superset/sync/native/command_test.py @@ -0,0 +1,273 @@ +""" +Tests for the native import command. +""" +# pylint: disable=redefined-outer-name, invalid-name + +from pathlib import Path +from typing import List +from unittest import mock +from zipfile import ZipFile + +import pytest +import yaml +from click.testing import CliRunner +from freezegun import freeze_time +from pyfakefs.fake_filesystem import FakeFilesystem +from pytest_mock import MockerFixture + +from preset_cli.cli.superset.main import superset +from preset_cli.cli.superset.sync.native.command import ( + import_resource, + load_user_modules, + prompt_for_passwords, +) +from preset_cli.exceptions import ErrorLevel, ErrorPayload, SupersetError + + +def test_prompt_for_passwords(mocker: MockerFixture) -> None: + """ + Test ``prompt_for_passwords``. + """ + getpass = mocker.patch("preset_cli.cli.superset.sync.native.command.getpass") + + config = {"sqlalchemy_uri": "postgresql://user:XXXXXXXXXX@host:5432/db"} + path = Path("/path/to/root/databases/psql.yaml") + prompt_for_passwords(path, config) + + getpass.getpass.assert_called_with(f"Please provide the password for {path}: ") + + config["password"] = "password123" + getpass.reset_mock() + prompt_for_passwords(path, config) + getpass.getpass.assert_not_called() + + +def test_import_resource(mocker: MockerFixture) -> None: + """ + Test ``import_resource``. + """ + client = mocker.MagicMock() + + contents = {"bundle/databases/gsheets.yaml": "GSheets"} + with freeze_time("2022-01-01T00:00:00Z"): + import_resource("database", contents=contents, client=client, overwrite=False) + + call = client.import_zip.mock_calls[0] + assert call.kwargs == {"overwrite": False} + + resource, buf = call.args + assert resource == "database" + with ZipFile(buf) as bundle: + assert bundle.namelist() == [ + "bundle/databases/gsheets.yaml", + "bundle/metadata.yaml", + ] + assert ( + bundle.read("bundle/metadata.yaml").decode() + == "timestamp: '2022-01-01T00:00:00+00:00'\ntype: Database\nversion: 1.0.0\n" + ) + assert bundle.read("bundle/databases/gsheets.yaml").decode() == "GSheets" + + +def test_import_resource_overwrite_needed(mocker: MockerFixture) -> None: + """ + Test ``import_resource`` when an overwrite error is raised. + """ + click = mocker.patch("preset_cli.cli.superset.sync.native.command.click") + client = mocker.MagicMock() + errors: List[ErrorPayload] = [ + { + "message": "Error importing database", + "error_type": "GENERIC_COMMAND_ERROR", + "level": ErrorLevel.WARNING, + "extra": { + "databases/gsheets.yaml": ( + "Database already exists and `overwrite=true` was not passed" + ), + "issue_codes": [ + { + "code": 1010, + "message": ( + "Issue 1010 - Superset encountered an " + "error while running a command." + ), + }, + ], + }, + }, + ] + client.import_zip.side_effect = SupersetError(errors) + + contents = {"bundle/databases/gsheets.yaml": "GSheets"} + import_resource("database", contents=contents, client=client, overwrite=False) + + assert click.style.called_with( + "The following file(s) already exist. Pass --overwrite to replace them.\n" + "databases/gsheets.yaml", + fg="bright_red", + ) + + +def test_import_resource_error(mocker: MockerFixture) -> None: + """ + Test ``import_resource`` when an unexpected error is raised. + """ + client = mocker.MagicMock() + errors: List[ErrorPayload] = [ + { + "message": "Error importing database", + "error_type": "GENERIC_COMMAND_ERROR", + "level": ErrorLevel.WARNING, + "extra": { + "issue_codes": [ + { + "code": 1010, + "message": ( + "Issue 1010 - Superset encountered an " + "error while running a command." + ), + }, + ], + }, + }, + ] + client.import_zip.side_effect = SupersetError(errors) + + contents = {"bundle/databases/gsheets.yaml": "GSheets"} + with pytest.raises(SupersetError) as excinfo: + import_resource("database", contents=contents, client=client, overwrite=False) + assert excinfo.value.errors == errors + + +def test_native(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``native`` command. + """ + root = Path("/path/to/root") + fs.create_dir(root) + database_config = {"database_name": "GSheets", "sqlalchemy_uri": "gsheets://"} + dataset_config = {"table_name": "test"} + fs.create_file( + root / "databases/gsheets.yaml", + contents=yaml.dump(database_config), + ) + fs.create_file( + root / "datasets/gsheets/test.yaml", + contents=yaml.dump(dataset_config), + ) + fs.create_file( + root / "README.txt", + contents="Hello, world", + ) + + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.native.command.SupersetClient", + ) + client = SupersetClient() + import_resource = mocker.patch( + "preset_cli.cli.superset.sync.native.command.import_resource", + ) + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + + runner = CliRunner() + result = runner.invoke( + superset, + ["https://superset.example.org/", "sync", "native", str(root)], + catch_exceptions=False, + ) + assert result.exit_code == 0 + contents = { + "bundle/databases/gsheets.yaml": yaml.dump(database_config), + "bundle/datasets/gsheets/test.yaml": yaml.dump(dataset_config), + } + import_resource.assert_has_calls( + [ + mock.call("database", contents, client, False), + mock.call("dataset", contents, client, False), + mock.call("chart", contents, client, False), + mock.call("dashboard", contents, client, False), + ], + ) + + +def test_native_external_url(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test the ``native`` command with an external URL. + """ + root = Path("/path/to/root") + fs.create_dir(root) + database_config = {"database_name": "GSheets", "sqlalchemy_uri": "gsheets://"} + dataset_config = {"table_name": "test"} + fs.create_file( + root / "databases/gsheets.yaml", + contents=yaml.dump(database_config), + ) + fs.create_file( + root / "datasets/gsheets/test.yaml", + contents=yaml.dump(dataset_config), + ) + + SupersetClient = mocker.patch( + "preset_cli.cli.superset.sync.native.command.SupersetClient", + ) + client = SupersetClient() + import_resource = mocker.patch( + "preset_cli.cli.superset.sync.native.command.import_resource", + ) + mocker.patch("preset_cli.cli.superset.main.UsernamePasswordAuth") + + runner = CliRunner() + result = runner.invoke( + superset, + [ + "https://superset.example.org/", + "sync", + "native", + str(root), + "--external-url-prefix", + "https://repo.example.com/", + ], + catch_exceptions=False, + ) + assert result.exit_code == 0 + database_config["external_url"] = "https://repo.example.com/databases/gsheets.yaml" + dataset_config[ + "external_url" + ] = "https://repo.example.com/datasets/gsheets/test.yaml" + contents = { + "bundle/databases/gsheets.yaml": yaml.dump(database_config), + "bundle/datasets/gsheets/test.yaml": yaml.dump(dataset_config), + } + import_resource.assert_has_calls( + [ + mock.call("database", contents, client, False), + mock.call("dataset", contents, client, False), + mock.call("chart", contents, client, False), + mock.call("dashboard", contents, client, False), + ], + ) + + +def test_load_user_modules(mocker: MockerFixture, fs: FakeFilesystem) -> None: + """ + Test ``load_user_modules``. + """ + importlib = mocker.patch("preset_cli.cli.superset.sync.native.command.importlib") + spec = mocker.MagicMock() + importlib.util.spec_from_file_location.side_effect = [spec, None] + + root = Path("/path/to/root") + fs.create_dir(root) + fs.create_file(root / "test.py") + fs.create_file(root / "invalid.py") + fs.create_file(root / "test.txt") + + load_user_modules(root) + + importlib.util.spec_from_file_location.assert_has_calls( + [ + mock.call("test", root / "test.py"), + mock.call("invalid", root / "invalid.py"), + ], + ) + assert importlib.util.module_from_spec.call_count == 1 diff --git a/tests/lib_test.py b/tests/lib_test.py new file mode 100644 index 00000000..0f86670c --- /dev/null +++ b/tests/lib_test.py @@ -0,0 +1,12 @@ +""" +Tests for ``preset_cli.lib``. +""" + +from preset_cli.lib import remove_root + + +def test_remove_root() -> None: + """ + Test ``remove_root``. + """ + assert remove_root("bundle/database/examples.yaml") == "database/examples.yaml"