Merge pull request #237 from preset-io/jinja_escaper_fix

fix(import/export): Improve Jinja handling logic
preset-io · Sep 8, 2023 · a219739 · a219739
2 parents c2b0d39 + 1da7e67
commit a219739
Show file tree

Hide file tree

Showing 4 changed files with 238 additions and 15 deletions.
diff --git a/src/preset_cli/cli/superset/export.py b/src/preset_cli/cli/superset/export.py
@@ -2,10 +2,11 @@
 A command to export Superset resources into a directory.
 """
 
+import json
 import re
 from collections import defaultdict
 from pathlib import Path
-from typing import List, Set, Tuple
+from typing import Any, Callable, List, Set, Tuple
 from zipfile import ZipFile
 
 import click
@@ -142,12 +143,43 @@ def export_resource(  # pylint: disable=too-many-arguments, too-many-locals
 
         # escape any pre-existing Jinja2 templates
         if not disable_jinja_escaping:
-            file_contents = jinja_escaper(file_contents)
+            asset_content = yaml.load(file_contents, Loader=yaml.SafeLoader)
+            for key, value in asset_content.items():
+                asset_content[key] = traverse_data(value, handle_string)
+
+            file_contents = yaml.dump(asset_content, sort_keys=False)
 
         with open(target, "w", encoding="utf-8") as output:
             output.write(file_contents)
 
 
+def traverse_data(value: Any, handler: Callable) -> Any:
+    """
+    Process value according to its data type
+    """
+    if isinstance(value, str):
+        return handler(value)
+    if isinstance(value, dict) and value:
+        return {k: traverse_data(v, handler) for k, v in value.items()}
+    if isinstance(value, list) and value:
+        return [traverse_data(item, handler) for item in value]
+    return value
+
+
+def handle_string(value):
+    """
+    Try to load a string as JSON to traverse its content for proper Jinja templating escaping.
+    Required for fields like ``query_context``
+    """
+    try:
+        asset_dict = json.loads(value)
+        return (
+            json.dumps(traverse_data(asset_dict, jinja_escaper)) if asset_dict else "{}"
+        )
+    except json.JSONDecodeError:
+        return jinja_escaper(value)
+
+
 def jinja_escaper(value: str) -> str:
     """
     Escape Jinja macros and logical statements that shouldn't be handled by CLI

diff --git a/src/preset_cli/cli/superset/sync/native/command.py b/src/preset_cli/cli/superset/sync/native/command.py
@@ -19,6 +19,7 @@
 import requests
 import yaml
 from jinja2 import Template
+from jinja2.exceptions import TemplateSyntaxError
 from sqlalchemy.engine import create_engine
 from sqlalchemy.engine.url import make_url
 from yarl import URL
@@ -97,10 +98,19 @@ def render_yaml(path: Path, env: Dict[str, Any]) -> Dict[str, Any]:
     env["filepath"] = path
 
     with open(path, encoding="utf-8") as input_:
-        template = Template(input_.read())
+        asset_content = input_.read()
 
-    content = template.render(**env)
+    try:
+        template = Template(asset_content)
 
+    # For charts with a `query_context` -> ``str(JSON)``, templating the YAML structure directly
+    # was failing. The route str(YAML) -> dict -> str(JSON) is more consistent.
+    except TemplateSyntaxError:
+        content = yaml.load(asset_content, Loader=yaml.SafeLoader)
+        content = json.dumps(content)
+        template = Template(content)
+
+    content = template.render(**env)
     return yaml.load(content, Loader=yaml.SafeLoader)
 
 

diff --git a/tests/cli/superset/export_test.py b/tests/cli/superset/export_test.py
@@ -3,6 +3,7 @@
 """
 # pylint: disable=redefined-outer-name, invalid-name, unused-argument
 
+import json
 from io import BytesIO
 from pathlib import Path
 from unittest import mock
@@ -21,19 +22,20 @@
 
 
 @pytest.fixture
-def dataset_export() -> BytesIO:
+def chart_export() -> BytesIO:
+    # pylint: disable=line-too-long
     """
-    Fixture for the contents of a simple database export.
+    Fixture for the contents of a simple chart export.
     """
     contents = {
-        "dashboard_export/metadata.yaml": "Metadata",
-        "dashboard_export/databases/gsheets.yaml": yaml.dump(
+        "chart_export/metadata.yaml": "Metadata",
+        "chart_export/databases/gsheets.yaml": yaml.dump(
             {
                 "database_name": "GSheets",
                 "sqlalchemy_uri": "gsheets://",
             },
         ),
-        "dashboard_export/datasets/gsheets/test.yaml": yaml.dump(
+        "chart_export/datasets/gsheets/test.yaml": yaml.dump(
             {
                 "table_name": "test",
                 "sql": """
@@ -48,6 +50,43 @@ def dataset_export() -> BytesIO:
 GROUP BY action""",
             },
         ),
+        "chart_export/charts/test_01.yaml": yaml.dump(
+            {
+                "slice_name": "test",
+                "viz_type": "big_number_total",
+                "params": {
+                    "datasource": "1__table",
+                    "viz_type": "big_number_total",
+                    "slice_id": 1,
+                    "metric": {
+                        "expressionType": "SQL",
+                        "sqlExpression": "{% if from_dttm %} count(*) {% else %} count(*) {% endif %}",
+                        "column": None,
+                        "aggregate": None,
+                        "datasourceWarning": False,
+                        "hasCustomLabel": True,
+                        "label": "custom_calculation",
+                        "optionName": "metric_6aq7h4t8b3t_jbp2rak398o",
+                    },
+                    "adhoc_filters": [],
+                    "header_font_size": 0.4,
+                    "subheader_font_size": 0.15,
+                    "y_axis_format": "SMART_NUMBER",
+                    "time_format": "smart_date",
+                    "extra_form_data": {},
+                    "dashboards": [],
+                },
+                "query_context": """
+{"datasource":{"id":1,"type":"table"},"force":false,"queries":[{"filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},
+"columns":[],"metrics":[{"expressionType":"SQL","sqlExpression":"{% if from_dttm %} count(*) {% else %} count(*) {% endif %}","column":null,"aggregate":null,
+"datasourceWarning":false,"hasCustomLabel":true,"label":"custom_calculation","optionName":"metric_6aq7h4t8b3t_jbp2rak398o"}],"annotation_layers":[],
+"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"1__table","viz_type":"big_number_total",
+"slice_id":1,"metric":{"expressionType":"SQL","sqlExpression":"{% if from_dttm %} count(*) {% else %} count(*) {% endif %}","column":null,"aggregate":null,
+"datasourceWarning":false,"hasCustomLabel":true,"label":"custom_calculation","optionName":"metric_6aq7h4t8b3t_jbp2rak398o"},"adhoc_filters":[],"header_font_size":0.4,
+"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","extra_form_data":{},"dashboards":[],"force":false,"result_format":"json","result_type":"full"},
+"result_format":"json","result_type":"full"}""",
+            },
+        ),
     }
 
     buf = BytesIO()
@@ -62,16 +101,17 @@ def dataset_export() -> BytesIO:
 def test_export_resource(
     mocker: MockerFixture,
     fs: FakeFilesystem,
-    dataset_export: BytesIO,
+    chart_export: BytesIO,
 ) -> None:
+    # pylint: disable=line-too-long
     """
     Test ``export_resource``.
     """
     root = Path("/path/to/root")
     fs.create_dir(root)
 
     client = mocker.MagicMock()
-    client.export_zip.return_value = dataset_export
+    client.export_zip.return_value = chart_export
 
     export_resource(
         resource_name="database",
@@ -108,14 +148,70 @@ def test_export_resource(
 GROUP BY action""",
         }
 
+    # check that chart JSON structure was not escaped, only Jinja templating
+    export_resource(
+        resource_name="chart",
+        requested_ids=set(),
+        root=root,
+        client=client,
+        overwrite=False,
+        disable_jinja_escaping=False,
+    )
+    with open(root / "charts/test_01.yaml", encoding="utf-8") as input_:
+        # load `query_context` as JSON to avoid
+        # mismatches due to blank spaces, single vs double quotes, etc
+        input__ = yaml.load(input_.read(), Loader=yaml.SafeLoader)
+        print(input__["query_context"])
+        input__["query_context"] = json.loads(
+            input__["query_context"],
+        )
+        print(input__["query_context"])
+        assert input__ == {
+            "slice_name": "test",
+            "viz_type": "big_number_total",
+            "params": {
+                "datasource": "1__table",
+                "viz_type": "big_number_total",
+                "slice_id": 1,
+                "metric": {
+                    "expressionType": "SQL",
+                    "sqlExpression": "{{ '{% if' }} from_dttm {{ '%}' }} count(*) {{ '{% else %}' }} count(*) {{ '{% endif %}' }}",
+                    "column": None,
+                    "aggregate": None,
+                    "datasourceWarning": False,
+                    "hasCustomLabel": True,
+                    "label": "custom_calculation",
+                    "optionName": "metric_6aq7h4t8b3t_jbp2rak398o",
+                },
+                "adhoc_filters": [],
+                "header_font_size": 0.4,
+                "subheader_font_size": 0.15,
+                "y_axis_format": "SMART_NUMBER",
+                "time_format": "smart_date",
+                "extra_form_data": {},
+                "dashboards": [],
+            },
+            "query_context": json.loads(
+                """
+{"datasource":{"id":1,"type":"table"},"force":false,"queries":[{"filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":[],
+"metrics":[{"expressionType":"SQL","sqlExpression":"{{ '{% if' }} from_dttm {{ '%}' }} count(*) {{ '{% else %}' }} count(*) {{ '{% endif %}' }}",
+"column":null,"aggregate":null,"datasourceWarning":false,"hasCustomLabel":true,"label":"custom_calculation","optionName":"metric_6aq7h4t8b3t_jbp2rak398o"}],
+"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"1__table",
+"viz_type":"big_number_total","slice_id":1,"metric":{"expressionType":"SQL","sqlExpression":"{{ '{% if' }} from_dttm {{ '%}' }} count(*) {{ '{% else %}' }} count(*) {{ '{% endif %}' }}",
+"column":null,"aggregate":null,"datasourceWarning":false,"hasCustomLabel":true,"label":"custom_calculation","optionName":"metric_6aq7h4t8b3t_jbp2rak398o"},
+"adhoc_filters":[],"header_font_size":0.4,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date",
+"extra_form_data":{},"dashboards":[],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}""",
+            ),
+        }
+
     # metadata file should be ignored
     assert not (root / "metadata.yaml").exists()
 
 
 def test_export_resource_overwrite(
     mocker: MockerFixture,
     fs: FakeFilesystem,
-    dataset_export: BytesIO,
+    chart_export: BytesIO,
 ) -> None:
     """
     Test that we need to confirm overwrites.
@@ -124,7 +220,7 @@ def test_export_resource_overwrite(
     fs.create_dir(root)
 
     client = mocker.MagicMock()
-    client.export_zip.return_value = dataset_export
+    client.export_zip.return_value = chart_export
 
     export_resource(
         resource_name="database",
@@ -530,7 +626,7 @@ def test_export_ownership(mocker: MockerFixture, fs: FakeFilesystem) -> None:
 def test_export_resource_jinja_escaping_disabled(
     mocker: MockerFixture,
     fs: FakeFilesystem,
-    dataset_export: BytesIO,
+    chart_export: BytesIO,
 ) -> None:
     """
     Test ``export_resource`` with ``--disable-jinja-escaping``.
@@ -539,7 +635,7 @@ def test_export_resource_jinja_escaping_disabled(
     fs.create_dir(root)
 
     client = mocker.MagicMock()
-    client.export_zip.return_value = dataset_export
+    client.export_zip.return_value = chart_export
 
     # check that Jinja2 was not escaped
     export_resource(