From de64c4c80c0dff824bd4f84d1cf12b29d5cfba08 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Thu, 17 Nov 2016 17:56:52 +0300 Subject: [PATCH 1/6] simple GA query runner --- redash/query_runner/google_analytics.py | 111 ++++++++++++++++++++++++ redash/settings.py | 3 +- 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 redash/query_runner/google_analytics.py diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py new file mode 100644 index 0000000000..a97188b816 --- /dev/null +++ b/redash/query_runner/google_analytics.py @@ -0,0 +1,111 @@ +from base64 import b64decode +import json +import logging +from redash.query_runner import * +from redash.utils import JSONEncoder +from urlparse import urlparse, parse_qs +import pprint +logger = logging.getLogger(__name__) + +pp = pprint.PrettyPrinter() + +try: + import gspread + from oauth2client.client import SignedJwtAssertionCredentials + from apiclient.discovery import build + import httplib2 + enabled = True +except ImportError as e: + logger.info(str(e)) + enabled = False + + +def _load_key(filename): + with open(filename, "rb") as f: + return json.loads(f.read()) + + +types_conv = dict(STRING='string', INTEGER='integer', FLOAT='float', ) + + +class GoogleAnalytics(BaseQueryRunner): + @classmethod + def annotate_query(cls): + return False + + @classmethod + def type(cls): + return "google_analytics" + + @classmethod + def enabled(cls): + return enabled + + @classmethod + def configuration_schema(cls): + return { + 'type': 'object', + 'properties': { + 'jsonKeyFile': { + "type": "string", + 'title': 'JSON Key File' + } + }, + 'required': ['jsonKeyFile'], + 'secret': ['jsonKeyFile'] + } + + def __init__(self, configuration): + super(GoogleAnalytics, self).__init__(configuration) + + def _get_analytics_service(self): + scope = ['https://www.googleapis.com/auth/analytics.readonly'] + key = json.loads(b64decode(self.configuration['jsonKeyFile'])) + credentials = SignedJwtAssertionCredentials(key['client_email'], key["private_key"], scope=scope) + return build('analytics', 'v3', http=credentials.authorize(httplib2.Http())) + + def _analytics_query(self, line): + params = parse_qs(urlparse(line).query, keep_blank_values=True) + for key in params.keys(): + params[key] = ','.join(params[key]) + if '-' in key: + params[key.replace('-', '_')] = params.pop(key) + if len(params) > 0: + response = self._get_analytics_service().data().ga().get(**params).execute() + columns = [{'name': h['name'], 'friendly_name': h['name'].split(':')[1], + 'type': types_conv.get(h['dataType'], 'string')} for h in response['columnHeaders']] + rows = [] + for r in response['rows']: + d = {} + for c, value in enumerate(r): + d[response['columnHeaders'][c]['name']] = value + rows.append(d) + data = {'columns': columns, 'rows': rows} + return data + + def run_query(self, query, user): + logger.info("Analytics is about to execute query: %s", query) + params = parse_qs(urlparse(query).query, keep_blank_values=True) + for key in params.keys(): + params[key] = ','.join(params[key]) + if '-' in key: + params[key.replace('-', '_')] = params.pop(key) + if len(params) > 0: + response = self._get_analytics_service().data().ga().get(**params).execute() + columns = [{'name': h['name'], 'friendly_name': h['name'].split(':')[1], + 'type': types_conv.get(h['dataType'], 'string')} for h in response['columnHeaders']] + rows = [] + for r in response['rows']: + d = {} + for c, value in enumerate(r): + d[response['columnHeaders'][c]['name']] = value + rows.append(d) + data = {'columns': columns, 'rows': rows} + error = None + json_data = json.dumps(data, cls=JSONEncoder) + else: + error = 'Wrong query format' + json_data = None + return json_data, error + +register(GoogleAnalytics) diff --git a/redash/settings.py b/redash/settings.py index f597c9495a..acf0abd8c6 100644 --- a/redash/settings.py +++ b/redash/settings.py @@ -173,7 +173,8 @@ def all_settings(): 'redash.query_runner.sqlite', 'redash.query_runner.dynamodb_sql', 'redash.query_runner.mssql', - 'redash.query_runner.jql' + 'redash.query_runner.jql', + 'redash.query_runner.google_analytics' ] enabled_query_runners = array_from_string(os.environ.get("REDASH_ENABLED_QUERY_RUNNERS", ",".join(default_query_runners))) From 0a0ca219d0cb5e4e5db8b36ff030d24cd3340a2e Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Sun, 20 Nov 2016 13:54:58 +0300 Subject: [PATCH 2/6] google_analytics: removed unused code --- redash/query_runner/google_analytics.py | 29 +++++-------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py index a97188b816..09a559d520 100644 --- a/redash/query_runner/google_analytics.py +++ b/redash/query_runner/google_analytics.py @@ -4,11 +4,9 @@ from redash.query_runner import * from redash.utils import JSONEncoder from urlparse import urlparse, parse_qs -import pprint +from datetime import datetime logger = logging.getLogger(__name__) -pp = pprint.PrettyPrinter() - try: import gspread from oauth2client.client import SignedJwtAssertionCredentials @@ -25,7 +23,11 @@ def _load_key(filename): return json.loads(f.read()) -types_conv = dict(STRING='string', INTEGER='integer', FLOAT='float', ) +types_conv = dict( + STRING=TYPE_STRING, + INTEGER=TYPE_INTEGER, + FLOAT=TYPE_FLOAT, +) class GoogleAnalytics(BaseQueryRunner): @@ -64,25 +66,6 @@ def _get_analytics_service(self): credentials = SignedJwtAssertionCredentials(key['client_email'], key["private_key"], scope=scope) return build('analytics', 'v3', http=credentials.authorize(httplib2.Http())) - def _analytics_query(self, line): - params = parse_qs(urlparse(line).query, keep_blank_values=True) - for key in params.keys(): - params[key] = ','.join(params[key]) - if '-' in key: - params[key.replace('-', '_')] = params.pop(key) - if len(params) > 0: - response = self._get_analytics_service().data().ga().get(**params).execute() - columns = [{'name': h['name'], 'friendly_name': h['name'].split(':')[1], - 'type': types_conv.get(h['dataType'], 'string')} for h in response['columnHeaders']] - rows = [] - for r in response['rows']: - d = {} - for c, value in enumerate(r): - d[response['columnHeaders'][c]['name']] = value - rows.append(d) - data = {'columns': columns, 'rows': rows} - return data - def run_query(self, query, user): logger.info("Analytics is about to execute query: %s", query) params = parse_qs(urlparse(query).query, keep_blank_values=True) From c94daceb5f8e5f282d73f706f02057f4d6564bfa Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Sun, 20 Nov 2016 14:18:02 +0300 Subject: [PATCH 3/6] google_analytics: added date/datetime parsing --- redash/query_runner/google_analytics.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py index 09a559d520..27519da49f 100644 --- a/redash/query_runner/google_analytics.py +++ b/redash/query_runner/google_analytics.py @@ -27,6 +27,8 @@ def _load_key(filename): STRING=TYPE_STRING, INTEGER=TYPE_INTEGER, FLOAT=TYPE_FLOAT, + DATE=TYPE_DATE, + DATETIME=TYPE_DATETIME ) @@ -75,13 +77,29 @@ def run_query(self, query, user): params[key.replace('-', '_')] = params.pop(key) if len(params) > 0: response = self._get_analytics_service().data().ga().get(**params).execute() - columns = [{'name': h['name'], 'friendly_name': h['name'].split(':')[1], - 'type': types_conv.get(h['dataType'], 'string')} for h in response['columnHeaders']] + columns = [] + for h in response['columnHeaders']: + if h['name'] == 'ga:date': + h['dataType'] = 'DATE' + elif h['name'] == 'ga:dateHour': + h['dataType'] = 'DATETIME' + columns.append({ + 'name': h['name'], + 'friendly_name': h['name'].split(':', 1)[1], + 'type': types_conv.get(h['dataType'], 'string') + }) rows = [] for r in response['rows']: d = {} for c, value in enumerate(r): - d[response['columnHeaders'][c]['name']] = value + column_name = response['columnHeaders'][c]['name'] + column_type = filter(lambda col: col['name'] == column_name, columns)[0]['type'] + if column_type == TYPE_DATE: + value = datetime.strptime(value, '%Y%m%d') + elif column_type == TYPE_DATETIME: + if len(value) == 10: + value = datetime.strptime(value, '%Y%m%d%H') + d[column_name] = value rows.append(d) data = {'columns': columns, 'rows': rows} error = None From 8eefad290bbeda44371db4c75f6104706b44432b Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 23 Nov 2016 12:48:10 +0300 Subject: [PATCH 4/6] google_analytics: review fixes --- redash/query_runner/google_analytics.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py index 27519da49f..338c402fc1 100644 --- a/redash/query_runner/google_analytics.py +++ b/redash/query_runner/google_analytics.py @@ -69,12 +69,15 @@ def _get_analytics_service(self): return build('analytics', 'v3', http=credentials.authorize(httplib2.Http())) def run_query(self, query, user): - logger.info("Analytics is about to execute query: %s", query) - params = parse_qs(urlparse(query).query, keep_blank_values=True) - for key in params.keys(): - params[key] = ','.join(params[key]) - if '-' in key: - params[key.replace('-', '_')] = params.pop(key) + logger.debug("Analytics is about to execute query: %s", query) + try: + params = json.loads(query) + except: + params = parse_qs(urlparse(query).query, keep_blank_values=True) + for key in params.keys(): + params[key] = ','.join(params[key]) + if '-' in key: + params[key.replace('-', '_')] = params.pop(key) if len(params) > 0: response = self._get_analytics_service().data().ga().get(**params).execute() columns = [] @@ -99,6 +102,10 @@ def run_query(self, query, user): elif column_type == TYPE_DATETIME: if len(value) == 10: value = datetime.strptime(value, '%Y%m%d%H') + elif len(value) == 12: + value = datetime.strptime(value, '%Y%m%d%H%M') + else: + raise Exception('Wrong datetime format') d[column_name] = value rows.append(d) data = {'columns': columns, 'rows': rows} From 92dee61bcd8fb38f5a16d98f03de47f1f4bdfe8c Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 23 Nov 2016 17:17:05 +0300 Subject: [PATCH 5/6] google_analytics: added accounts and properties as schema tables --- redash/query_runner/google_analytics.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py index 338c402fc1..9de66082fc 100644 --- a/redash/query_runner/google_analytics.py +++ b/redash/query_runner/google_analytics.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from base64 import b64decode import json import logging @@ -32,7 +34,7 @@ def _load_key(filename): ) -class GoogleAnalytics(BaseQueryRunner): +class GoogleAnalytics(BaseSQLQueryRunner): @classmethod def annotate_query(cls): return False @@ -62,6 +64,21 @@ def configuration_schema(cls): def __init__(self, configuration): super(GoogleAnalytics, self).__init__(configuration) + def _get_tables(self, schema): + accounts = self._get_analytics_service().management().accounts().list().execute().get('items') + if accounts is None: + raise Exception("Failed getting accounts.") + else: + for account in accounts: + schema[account['name']] = {'name': account['name'], 'columns': []} + properties = self._get_analytics_service().management().webproperties().list( + accountId=account['id']).execute().get('items', []) + for property_ in properties: + schema[account['name']]['columns'].append( + u'{0} (ga:{1})'.format(property_['name'], property_['defaultProfileId']) + ) + return schema.values() + def _get_analytics_service(self): scope = ['https://www.googleapis.com/auth/analytics.readonly'] key = json.loads(b64decode(self.configuration['jsonKeyFile'])) From 6bdc863b64a5164e44d714c794a473cd5ac5309d Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Thu, 24 Nov 2016 23:17:04 +0300 Subject: [PATCH 6/6] google_analytics: review fixes #2 --- redash/query_runner/google_analytics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/redash/query_runner/google_analytics.py b/redash/query_runner/google_analytics.py index 9de66082fc..26ecc86119 100644 --- a/redash/query_runner/google_analytics.py +++ b/redash/query_runner/google_analytics.py @@ -10,7 +10,6 @@ logger = logging.getLogger(__name__) try: - import gspread from oauth2client.client import SignedJwtAssertionCredentials from apiclient.discovery import build import httplib2 @@ -122,7 +121,7 @@ def run_query(self, query, user): elif len(value) == 12: value = datetime.strptime(value, '%Y%m%d%H%M') else: - raise Exception('Wrong datetime format') + raise Exception("Unknown date/time format in results: '{}'".format(value)) d[column_name] = value rows.append(d) data = {'columns': columns, 'rows': rows}