From 2edfd07ffaa28241f98be8fa13c99ad0499a57ff Mon Sep 17 00:00:00 2001 From: AndrewChubatiuk Date: Sun, 21 Apr 2024 00:28:09 +0300 Subject: [PATCH 1/3] fixed athena query runner auth --- redash/query_runner/athena.py | 70 ++++++++++++----------------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 0a5f648417..dc4a1fd335 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -15,9 +15,6 @@ logger = logging.getLogger(__name__) ANNOTATE_QUERY = parse_boolean(os.environ.get("ATHENA_ANNOTATE_QUERY", "true")) -SHOW_EXTRA_SETTINGS = parse_boolean(os.environ.get("ATHENA_SHOW_EXTRA_SETTINGS", "true")) -ASSUME_ROLE = parse_boolean(os.environ.get("ATHENA_ASSUME_ROLE", "false")) -OPTIONAL_CREDENTIALS = parse_boolean(os.environ.get("ATHENA_OPTIONAL_CREDENTIALS", "true")) try: import boto3 @@ -65,6 +62,11 @@ def configuration_schema(cls): schema = { "type": "object", "properties": { + "iam_role": {"type": "string", "title": "IAM role to assume"}, + "external_id": { + "type": "string", + "title": "External ID to be used while STS assume role", + }, "region": {"type": "string", "title": "AWS Region"}, "aws_access_key": {"type": "string", "title": "AWS Access Key"}, "aws_secret_key": {"type": "string", "title": "AWS Secret Key"}, @@ -88,11 +90,20 @@ def configuration_schema(cls): "title": "Athena cost per Tb scanned (USD)", "default": 5, }, + "encryption_option": { + "type": "string", + "title": "Encryption Option", + }, + "kms_key": {"type": "string", "title": "KMS Key"}, }, "required": ["region", "s3_staging_dir"], - "extra_options": ["glue", "cost_per_tb"], + "extra_options": ["glue", "cost_per_tb", "encryption_option", "kms_key"], "order": [ + "aws_access_key", + "aws_secret_key", "region", + "iam_role", + "external_id", "s3_staging_dir", "schema", "work_group", @@ -101,42 +112,6 @@ def configuration_schema(cls): "secret": ["aws_secret_key"], } - if SHOW_EXTRA_SETTINGS: - schema["properties"].update( - { - "encryption_option": { - "type": "string", - "title": "Encryption Option", - }, - "kms_key": {"type": "string", "title": "KMS Key"}, - } - ) - schema["extra_options"].append("encryption_option") - schema["extra_options"].append("kms_key") - - if ASSUME_ROLE: - del schema["properties"]["aws_access_key"] - del schema["properties"]["aws_secret_key"] - schema["secret"] = [] - - schema["order"].insert(1, "iam_role") - schema["order"].insert(2, "external_id") - schema["properties"].update( - { - "iam_role": {"type": "string", "title": "IAM role to assume"}, - "external_id": { - "type": "string", - "title": "External ID to be used while STS assume role", - }, - } - ) - else: - schema["order"].insert(1, "aws_access_key") - schema["order"].insert(2, "aws_secret_key") - - if not OPTIONAL_CREDENTIALS and not ASSUME_ROLE: - schema["required"] += ["aws_access_key", "aws_secret_key"] - return schema @classmethod @@ -153,9 +128,14 @@ def type(cls): return "athena" def _get_iam_credentials(self, user=None): - if ASSUME_ROLE: + args = { + "aws_access_key_id": self.configuration.get("aws_access_key", None), + "aws_secret_access_key": self.configuration.get("aws_secret_key", None), + "region_name": self.configuration["region"], + } + if self.configuration.get("iam_role"): role_session_name = "redash" if user is None else user.email - sts = boto3.client("sts") + sts = boto3.client("sts", **args) creds = sts.assume_role( RoleArn=self.configuration.get("iam_role"), RoleSessionName=role_session_name, @@ -168,11 +148,7 @@ def _get_iam_credentials(self, user=None): "region_name": self.configuration["region"], } else: - return { - "aws_access_key_id": self.configuration.get("aws_access_key", None), - "aws_secret_access_key": self.configuration.get("aws_secret_key", None), - "region_name": self.configuration["region"], - } + return args def __get_schema_from_glue(self): client = boto3.client("glue", **self._get_iam_credentials()) From d715fd913df0d68900d06a407c36a5fd99cd3877 Mon Sep 17 00:00:00 2001 From: AndrewChubatiuk Date: Sun, 21 Apr 2024 09:15:37 +0300 Subject: [PATCH 2/3] use same approach for the rest aws runners --- redash/query_runner/amazon_elasticsearch.py | 39 +++++++++++++------- redash/query_runner/cloudwatch.py | 40 +++++++++++++++------ redash/query_runner/cloudwatch_insights.py | 40 +++++++++++++++------ 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/redash/query_runner/amazon_elasticsearch.py b/redash/query_runner/amazon_elasticsearch.py index 1b36cad75c..91efe2f0cd 100644 --- a/redash/query_runner/amazon_elasticsearch.py +++ b/redash/query_runner/amazon_elasticsearch.py @@ -2,6 +2,7 @@ from .elasticsearch2 import ElasticSearch2 try: + import boto3 from botocore import credentials, session from requests_aws_sign import AWSV4Sign @@ -32,9 +33,10 @@ def configuration_schema(cls): "region": {"type": "string"}, "access_key": {"type": "string", "title": "Access Key"}, "secret_key": {"type": "string", "title": "Secret Key"}, - "use_aws_iam_profile": { - "type": "boolean", - "title": "Use AWS IAM Profile", + "iam_role": {"type": "string", "title": "IAM role to assume"}, + "external_id": { + "type": "string", + "title": "External ID to be used while STS assume role", }, }, "secret": ["secret_key"], @@ -43,24 +45,35 @@ def configuration_schema(cls): "region", "access_key", "secret_key", - "use_aws_iam_profile", + "iam_role", + "external_id", ], "required": ["server", "region"], } def __init__(self, configuration): super(AmazonElasticsearchService, self).__init__(configuration) - region = configuration["region"] - cred = None - if configuration.get("use_aws_iam_profile", False): - cred = credentials.get_credentials(session.Session()) - else: - cred = credentials.Credentials( - access_key=configuration.get("access_key", ""), - secret_key=configuration.get("secret_key", ""), + args = { + "region_name": region, + "aws_access_key_id": configuration.get("access_key", None), + "aws_secret_access_key": configuration.get("secret_key", None), + } + if configuration.get("iam_role"): + role_session_name = "redash" + sts = boto3.client("sts", **args) + creds = sts.assume_role( + RoleArn=configuration.get("iam_role"), + RoleSessionName=role_session_name, + ExternalId=configuration.get("external_id"), ) - + args = { + "aws_access_key_id": creds["Credentials"]["AccessKeyId"], + "aws_secret_access_key": creds["Credentials"]["SecretAccessKey"], + "aws_session_token": creds["Credentials"]["SessionToken"], + "region_name": region, + } + cred = credentials.get_credentials(session.Session(**args)) self.auth = AWSV4Sign(cred, region, "es") def get_auth(self): diff --git a/redash/query_runner/cloudwatch.py b/redash/query_runner/cloudwatch.py index 699834c0a9..677d8d518d 100644 --- a/redash/query_runner/cloudwatch.py +++ b/redash/query_runner/cloudwatch.py @@ -61,12 +61,17 @@ def configuration_schema(cls): return { "type": "object", "properties": { + "iam_role": {"type": "string", "title": "IAM role to assume"}, + "external_id": { + "type": "string", + "title": "External ID to be used while STS assume role", + }, "region": {"type": "string", "title": "AWS Region"}, "aws_access_key": {"type": "string", "title": "AWS Access Key"}, "aws_secret_key": {"type": "string", "title": "AWS Secret Key"}, }, - "required": ["region", "aws_access_key", "aws_secret_key"], - "order": ["region", "aws_access_key", "aws_secret_key"], + "required": ["region"], + "order": ["region", "aws_access_key", "aws_secret_key", "iam_role", "external_id"], "secret": ["aws_secret_key"], } @@ -81,14 +86,27 @@ def __init__(self, configuration): def test_connection(self): self.get_schema() - def _get_client(self): - cloudwatch = boto3.client( - "cloudwatch", - region_name=self.configuration.get("region"), - aws_access_key_id=self.configuration.get("aws_access_key"), - aws_secret_access_key=self.configuration.get("aws_secret_key"), - ) - return cloudwatch + def _get_client(self, user=None): + args = { + "aws_access_key_id": self.configuration.get("aws_access_key", None), + "aws_secret_access_key": self.configuration.get("aws_secret_key", None), + "region_name": self.configuration["region"], + } + if self.configuration.get("iam_role"): + role_session_name = "redash" if user is None else user.email + sts = boto3.client("sts", **args) + creds = sts.assume_role( + RoleArn=self.configuration.get("iam_role"), + RoleSessionName=role_session_name, + ExternalId=self.configuration.get("external_id"), + ) + return { + "aws_access_key_id": creds["Credentials"]["AccessKeyId"], + "aws_secret_access_key": creds["Credentials"]["SecretAccessKey"], + "aws_session_token": creds["Credentials"]["SessionToken"], + "region_name": self.configuration["region"], + } + return boto3.client("cloudwatch", **args) def get_schema(self, get_stats=False): client = self._get_client() @@ -110,7 +128,7 @@ def get_schema(self, get_stats=False): return list(metrics.values()) def run_query(self, query, user): - cloudwatch = self._get_client() + cloudwatch = self._get_client(user) query = parse_query(query) diff --git a/redash/query_runner/cloudwatch_insights.py b/redash/query_runner/cloudwatch_insights.py index f0ebcea117..cd067a7c9d 100644 --- a/redash/query_runner/cloudwatch_insights.py +++ b/redash/query_runner/cloudwatch_insights.py @@ -81,9 +81,14 @@ def configuration_schema(cls): "region": {"type": "string", "title": "AWS Region"}, "aws_access_key": {"type": "string", "title": "AWS Access Key"}, "aws_secret_key": {"type": "string", "title": "AWS Secret Key"}, + "iam_role": {"type": "string", "title": "IAM role to assume"}, + "external_id": { + "type": "string", + "title": "External ID to be used while STS assume role", + }, }, - "required": ["region", "aws_access_key", "aws_secret_key"], - "order": ["region", "aws_access_key", "aws_secret_key"], + "required": ["region"], + "order": ["region", "aws_access_key", "aws_secret_key", "iam_role", "external_id"], "secret": ["aws_secret_key"], } @@ -98,14 +103,27 @@ def __init__(self, configuration): def test_connection(self): self.get_schema() - def _get_client(self): - cloudwatch = boto3.client( - "logs", - region_name=self.configuration.get("region"), - aws_access_key_id=self.configuration.get("aws_access_key"), - aws_secret_access_key=self.configuration.get("aws_secret_key"), - ) - return cloudwatch + def _get_client(self, user=None): + args = { + "aws_access_key_id": self.configuration.get("aws_access_key", None), + "aws_secret_access_key": self.configuration.get("aws_secret_key", None), + "region_name": self.configuration["region"], + } + if self.configuration.get("iam_role"): + role_session_name = "redash" if user is None else user.email + sts = boto3.client("sts", **args) + creds = sts.assume_role( + RoleArn=self.configuration.get("iam_role"), + RoleSessionName=role_session_name, + ExternalId=self.configuration.get("external_id"), + ) + args = { + "aws_access_key_id": creds["Credentials"]["AccessKeyId"], + "aws_secret_access_key": creds["Credentials"]["SecretAccessKey"], + "aws_session_token": creds["Credentials"]["SessionToken"], + "region_name": self.configuration["region"], + } + return boto3.client("logs", **args) def get_schema(self, get_stats=False): client = self._get_client() @@ -127,7 +145,7 @@ def get_schema(self, get_stats=False): return log_groups def run_query(self, query, user): - logs = self._get_client() + logs = self._get_client(user) query = parse_query(query) query_id = logs.start_query(**query)["queryId"] From 13ac8ec45ed73dfc9c6f87fe427df4f43c467108 Mon Sep 17 00:00:00 2001 From: AndrewChubatiuk Date: Tue, 23 Apr 2024 13:42:34 +0300 Subject: [PATCH 3/3] use same approach for redshift, added redshift-serverless support --- poetry.lock | 41 ++++++++------- pyproject.toml | 4 +- redash/query_runner/pg.py | 107 +++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 81 deletions(-) diff --git a/poetry.lock b/poetry.lock index a3ad412c46..6d95924b6a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -421,41 +421,44 @@ files = [ [[package]] name = "boto3" -version = "1.28.8" +version = "1.34.89" description = "The AWS SDK for Python" optional = false -python-versions = ">= 3.7" +python-versions = ">=3.8" files = [ - {file = "boto3-1.28.8-py3-none-any.whl", hash = "sha256:7132ac3f3a9c28b84dcc344cfb439d37d2c5ab45f6b577358fc9aeba5d5aab63"}, - {file = "boto3-1.28.8.tar.gz", hash = "sha256:cf88309d9b8cd9a2fb0c8049cb4b217b4e9dcb55bf670d6054b0bbe2eef25e57"}, + {file = "boto3-1.34.89-py3-none-any.whl", hash = "sha256:f9166f485d64b012d46acd212fb29a45b195a85ff66a645b05b06d9f7572af36"}, + {file = "boto3-1.34.89.tar.gz", hash = "sha256:e0940e43810fe82f5b77442c751491fcc2768af7e7c3e8c15ea158e1ca9b586c"}, ] [package.dependencies] -botocore = ">=1.31.8,<1.32.0" +botocore = ">=1.34.89,<1.35.0" jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.6.0,<0.7.0" +s3transfer = ">=0.10.0,<0.11.0" [package.extras] crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.31.8" +version = "1.34.89" description = "Low-level, data-driven core of boto 3." optional = false -python-versions = ">= 3.7" +python-versions = ">=3.8" files = [ - {file = "botocore-1.31.8-py3-none-any.whl", hash = "sha256:61ba7efaa6305c1928b9b3fbb6f780cbfbd762e19008d20c11ba52b47f20e1b0"}, - {file = "botocore-1.31.8.tar.gz", hash = "sha256:092baa2168ae78080b0c28011527bfc11d8debd3767aa1e9a4ce8a91fd9943a2"}, + {file = "botocore-1.34.89-py3-none-any.whl", hash = "sha256:35205ed7db13058a3f7114c28e93058a8ff1490dfc6a5b5dff9c581c738fbf59"}, + {file = "botocore-1.34.89.tar.gz", hash = "sha256:6624b69bcdf2c5d0568b7bc9cbac13e605f370e7ea06710c61e2e2dc76831141"}, ] [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" -urllib3 = ">=1.25.4,<1.27" +urllib3 = [ + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, +] [package.extras] -crt = ["awscrt (==0.16.26)"] +crt = ["awscrt (==0.20.9)"] [[package]] name = "cachetools" @@ -4202,20 +4205,20 @@ files = [ [[package]] name = "s3transfer" -version = "0.6.2" +version = "0.10.1" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">= 3.7" +python-versions = ">= 3.8" files = [ - {file = "s3transfer-0.6.2-py3-none-any.whl", hash = "sha256:b014be3a8a2aab98cfe1abc7229cc5a9a0cf05eb9c1f2b86b230fd8df3f78084"}, - {file = "s3transfer-0.6.2.tar.gz", hash = "sha256:cab66d3380cca3e70939ef2255d01cd8aece6a4907a9528740f668c4b0611861"}, + {file = "s3transfer-0.10.1-py3-none-any.whl", hash = "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d"}, + {file = "s3transfer-0.10.1.tar.gz", hash = "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19"}, ] [package.dependencies] -botocore = ">=1.12.36,<2.0a.0" +botocore = ">=1.33.2,<2.0a.0" [package.extras] -crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "sasl" @@ -5300,4 +5303,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "e7985ee5c3ca3a4389b4e85fda033a9b3b867dbbe4b4a7fca8ea5c35fc401148" +content-hash = "a50c491f9171d7a63c1f76562a8cca77d37caf686823ef8f227f9a6a3e8d64af" diff --git a/pyproject.toml b/pyproject.toml index 8510146336..0270d7e4e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,8 +92,8 @@ optional = true [tool.poetry.group.all_ds.dependencies] atsd-client = "3.0.5" azure-kusto-data = "0.0.35" -boto3 = "1.28.8" -botocore = "1.31.8" +boto3 = "1.34.89" +botocore = "1.34.89" cassandra-driver = "3.21.0" certifi = ">=2019.9.11" cmem-cmempy = "21.2.3" diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index 46bf3cc0b8..fe8d20f0c5 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -5,6 +5,13 @@ from tempfile import NamedTemporaryFile from uuid import uuid4 +try: + import boto3 + + IAM_ENABLED = True +except ImportError: + IAM_ENABLED = False + import psycopg2 from psycopg2.extras import Range @@ -23,13 +30,6 @@ logger = logging.getLogger(__name__) -try: - import boto3 - - IAM_ENABLED = True -except ImportError: - IAM_ENABLED = False - types_map = { 20: TYPE_INTEGER, 21: TYPE_INTEGER, @@ -432,17 +432,6 @@ def name(cls): def enabled(cls): return IAM_ENABLED - def _login_method_selection(self): - if self.configuration.get("rolename"): - if not self.configuration.get("aws_access_key_id") or not self.configuration.get("aws_secret_access_key"): - return "ASSUME_ROLE_NO_KEYS" - else: - return "ASSUME_ROLE_KEYS" - elif self.configuration.get("aws_access_key_id") and self.configuration.get("aws_secret_access_key"): - return "KEYS" - elif not self.configuration.get("password"): - return "ROLE" - @classmethod def configuration_schema(cls): return { @@ -456,9 +445,10 @@ def configuration_schema(cls): "title": "AWS Secret Access Key", }, "clusterid": {"type": "string", "title": "Redshift Cluster ID"}, + "workgroup": {"type": "string", "title": "Redshift Serverless Workgroup"}, "user": {"type": "string"}, "host": {"type": "string"}, - "port": {"type": "number"}, + "port": {"type": "number", "default": 5439}, "dbname": {"type": "string", "title": "Database Name"}, "sslmode": {"type": "string", "title": "SSL Mode", "default": "prefer"}, "adhoc_query_group": { @@ -478,6 +468,7 @@ def configuration_schema(cls): "aws_access_key_id", "aws_secret_access_key", "clusterid", + "workgroup", "host", "port", "user", @@ -486,7 +477,7 @@ def configuration_schema(cls): "adhoc_query_group", "scheduled_query_group", ], - "required": ["dbname", "user", "host", "port", "aws_region"], + "required": ["dbname", "host", "aws_region"], "secret": ["aws_secret_access_key"], } @@ -495,54 +486,50 @@ def _get_connection(self): sslrootcert_path = os.path.join(os.path.dirname(__file__), "./files/redshift-ca-bundle.crt") - login_method = self._login_method_selection() - - if login_method == "KEYS": - client = boto3.client( - "redshift", - region_name=self.configuration.get("aws_region"), - aws_access_key_id=self.configuration.get("aws_access_key_id"), - aws_secret_access_key=self.configuration.get("aws_secret_access_key"), - ) - elif login_method == "ROLE": - client = boto3.client("redshift", region_name=self.configuration.get("aws_region")) - else: - if login_method == "ASSUME_ROLE_KEYS": - assume_client = client = boto3.client( - "sts", - region_name=self.configuration.get("aws_region"), - aws_access_key_id=self.configuration.get("aws_access_key_id"), - aws_secret_access_key=self.configuration.get("aws_secret_access_key"), - ) - else: - assume_client = client = boto3.client("sts", region_name=self.configuration.get("aws_region")) + boto_args = { + "region_name": self.configuration["aws_region"], + "aws_access_key_id": self.configuration.get("aws_access_key_id"), + "aws_secret_access_key": self.configuration.get("aws_secret_access_key"), + } + role_arn = self.configuration.get("rolename") + host = self.configuration["host"] + if role_arn: + assume_client = boto3.client("sts", **boto_args) role_session = f"redash_{uuid4().hex}" - session_keys = assume_client.assume_role( - RoleArn=self.configuration.get("rolename"), RoleSessionName=role_session - )["Credentials"] - client = boto3.client( - "redshift", - region_name=self.configuration.get("aws_region"), - aws_access_key_id=session_keys["AccessKeyId"], - aws_secret_access_key=session_keys["SecretAccessKey"], - aws_session_token=session_keys["SessionToken"], + session_keys = assume_client.assume_role(RoleArn=role_arn, RoleSessionName=role_session)["Credentials"] + boto_args = { + "region_name": self.configuration["aws_region"], + "aws_access_key_id": session_keys["AccessKeyId"], + "aws_secret_access_key": session_keys["SecretAccessKey"], + "aws_session_token": session_keys["SessionToken"], + } + db_credentials = {} + if self.configuration.get("clusterid"): + client = boto3.client("redshift", **boto_args) + credentials = client.get_cluster_credentials( + DbUser=self.configuration.get("user"), + DbName=self.configuration.get("dbname"), + ClusterIdentifier=self.configuration.get("clusterid"), ) - credentials = client.get_cluster_credentials( - DbUser=self.configuration.get("user"), - DbName=self.configuration.get("dbname"), - ClusterIdentifier=self.configuration.get("clusterid"), - ) - db_user = credentials["DbUser"] - db_password = credentials["DbPassword"] + db_credentials = {"user": credentials["DbUser"], "password": credentials["DbPassword"]} + elif self.configuration.get("workgroup"): + client = boto3.client("redshift-serverless", **boto_args) + credentials = client.get_credentials( + dbName=self.configuration.get("dbname"), + workgroupName=self.configuration.get("workgroup"), + ) + db_credentials = { + "user": credentials["dbUser"], + "password": credentials["dbPassword"], + } connection = psycopg2.connect( - user=db_user, - password=db_password, - host=self.configuration.get("host"), + host=host, port=self.configuration.get("port"), dbname=self.configuration.get("dbname"), sslmode=self.configuration.get("sslmode", "prefer"), sslrootcert=sslrootcert_path, async_=True, + **db_credentials, ) return connection