From a814a2b6b4740087a5c054de2fe7251ab35f48e8 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Wed, 16 Oct 2024 14:57:44 +0200 Subject: [PATCH 1/4] html blocks allows external reference files --- cads_catalogue/contents.py | 50 +++++++++++++++++-- cads_catalogue/layout_manager.py | 86 +++++++++++++++++++++++++++++++- tests/test_15_contents.py | 3 -- tests/test_90_entry_points.py | 6 +-- 4 files changed, 133 insertions(+), 12 deletions(-) diff --git a/cads_catalogue/contents.py b/cads_catalogue/contents.py index 2b07552..8f1bdb4 100644 --- a/cads_catalogue/contents.py +++ b/cads_catalogue/contents.py @@ -21,7 +21,7 @@ import sqlalchemy as sa import structlog -from cads_catalogue import config, database, object_storage +from cads_catalogue import config, database, layout_manager, object_storage THIS_PATH = os.path.abspath(os.path.dirname(__file__)) logger = structlog.get_logger(__name__) @@ -52,6 +52,9 @@ def content_sync( site, ctype, slug = content["site"], content["type"], content["slug"] subpath = os.path.join("contents", site, ctype, slug) for field in OBJECT_STORAGE_UPLOAD_FIELDS: + if field == "layout": + # already done by layout manager + continue file_path = content.get(field) if not file_path: continue @@ -143,9 +146,7 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An os.path.join(content_folder, rel_path) ) if os.path.isfile(ancillar_file_path): - metadata[ancillar_file_field] = os.path.abspath( - os.path.join(content_folder, rel_path) - ) + metadata[ancillar_file_field] = ancillar_file_path else: raise ValueError( f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!" @@ -154,6 +155,44 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An return ret_value +def transform_layout( + content: dict[str, Any], + storage_settings: config.ObjectStorageSettings, +): + """ + Modify layout.json information inside content metadata, with related uploads to the object storage. + + Parameters + ---------- + content: metadata of a loaded content from files + storage_settings: object with settings to access the object storage + + Returns + ------- + modified version of input resource metadata + """ + if not content.get("layout"): + return content + layout_file_path = content["layout"] + if not os.path.isfile(layout_file_path): + return content + layout_folder_path = os.path.dirname(layout_file_path) + with open(layout_file_path) as fp: + layout_data = json.load(fp) + logger.debug(f"input layout_data: {layout_data}") + + layout_data = layout_manager.transform_html_blocks(layout_data, layout_folder_path) + + logger.debug(f"output layout_data: {layout_data}") + site, ctype, slug = content["site"], content["type"], content["slug"] + subpath = os.path.join("contents", site, ctype, slug) + content["layout"] = layout_manager.store_layout_by_data( + layout_data, content, storage_settings, subpath=subpath + ) + logger.debug(f"layout url: {content['layout']}") + return content + + def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]: """ Load all contents from a folder and return a dictionary of metadata extracted. @@ -214,9 +253,10 @@ def update_catalogue_contents( "loaded %s contents from folder %s" % (len(contents), contents_package_path) ) involved_content_props = [] - for content in contents: + for content in contents[:]: site, ctype, slug = content["site"], content["type"], content["slug"] involved_content_props.append((site, ctype, slug)) + content = transform_layout(content, storage_settings) try: with session.begin_nested(): content_sync(session, content, storage_settings) diff --git a/cads_catalogue/layout_manager.py b/cads_catalogue/layout_manager.py index 449a130..8482c97 100644 --- a/cads_catalogue/layout_manager.py +++ b/cads_catalogue/layout_manager.py @@ -442,6 +442,86 @@ def transform_cim_blocks( return new_data +def manage_html_block_in_section(section, layout_folder_path): + """ + Look for html blocks and modify accordingly if it has references to external file. + + Parameters + ---------- + section: section of layout.json data + layout_folder_path: path to the folder containing layout file + """ + new_section = copy.deepcopy(section) + blocks = new_section.get("blocks", []) + for i, block in enumerate(copy.deepcopy(blocks)): + if block.get("type") == "html": + block_id = block["id"] + if "content_source" in block: + content_source = block["content_source"] + source_path = os.path.abspath( + os.path.join(layout_folder_path, content_source) + ) + is_content_in_block = "content" in block + if os.path.isfile(source_path): + # replacing/overwrite + if is_content_in_block: + # overwrite + msg = ( + f"found html block {block_id} with both 'content' and 'content_source': " + f"applying overwrite" + ) + logger.warning(msg) + with open(source_path) as fp: + blocks[i]["content"] = fp.read() + del blocks[i]["content_source"] + elif is_content_in_block: + # default + msg = ( + f"found html block {block_id} with both 'content' and 'content_source': " + f"applying default (not found source {content_source})" + ) + logger.warning(msg) + del blocks[i]["content_source"] + else: + # error + raise ValueError( + f"not found referred {content_source} in html block {block_id}" + ) + elif block.get("type") in ("section", "accordion"): + blocks[i] = manage_html_block_in_section(block, layout_folder_path) + return new_section + + +def transform_html_blocks( + layout_data: dict[str, Any], layout_folder_path: str | pathlib.Path +): + """Transform layout.json data replacing html blocks with referred external files. + + Parameters + ---------- + layout_data: data of the layout.json to transform + layout_folder_path: path to the folder containing layout file + + Returns + ------- + dict: dictionary of layout_data modified + """ + new_data = copy.deepcopy(layout_data) + # search all html blocks inside body/main/sections: + body = new_data.get("body", {}) + body_main = body.get("main", {}) + sections = body_main.get("sections", []) + for i, section in enumerate(copy.deepcopy(sections)): + sections[i] = manage_html_block_in_section(section, layout_folder_path) + # search all the html blocks inside body/aside: + aside_section = body.get("aside", {}) + if aside_section: + new_data["body"]["aside"] = manage_html_block_in_section( + aside_section, layout_folder_path + ) + return new_data + + def has_section_id(layout_data: dict[str, Any], section_id: str): """ Return True if layout has section id `section_id`. @@ -468,6 +548,7 @@ def store_layout_by_data( layout_data: dict[str, Any], resource: dict[str, Any], storage_settings: config.ObjectStorageSettings, + subpath: str | None = None, ) -> str: """ Store a layout.json in the object storage providing its json data. @@ -477,6 +558,7 @@ def store_layout_by_data( layout_data: data of the layout.json to store resource: resource dictionary (as returned by `load_resource_from_folder`) storage_settings: object with settings to access the object storage + subpath: bucket subpath, otherwise resources/ is assumed Returns ------- @@ -484,7 +566,8 @@ def store_layout_by_data( """ # upload of modified layout.json tempdir_path = tempfile.mkdtemp() - subpath = os.path.join("resources", resource["resource_uid"]) + if not subpath: + subpath = os.path.join("resources", resource["resource_uid"]) layout_temp_path = os.path.join(tempdir_path, "layout.json") with open(layout_temp_path, "w") as fp: json.dump(layout_data, fp, indent=2) @@ -533,6 +616,7 @@ def transform_layout( cim_layout_path = os.path.join( cim_folder_path, resource["resource_uid"], "quality_assurance.layout.json" ) + layout_data = transform_html_blocks(layout_data, resource_folder_path) layout_data = transform_cim_blocks( layout_data, cim_layout_path, resource["qa_flag"] ) diff --git a/tests/test_15_contents.py b/tests/test_15_contents.py index 5ac6246..5fe1350 100644 --- a/tests/test_15_contents.py +++ b/tests/test_15_contents.py @@ -216,7 +216,6 @@ def test_content_sync( ] content1["publication_date"] = "2021-03-18T11:02:31Z" content1["title"] = "new title" - content1["layout"] = os.path.join(content_folder, "cica-overview.png") with session_obj() as session: # db is not empty: update a content db_content2 = contents.content_sync(session, content1, storage_settings) @@ -253,8 +252,6 @@ def test_content_sync( for key, value in content1.items(): if key in ("publication_date", "content_update"): value = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ") # type: ignore - elif key in ("layout",): - value = "an url" elif key == "keywords": continue assert getattr(db_content2, key) == value diff --git a/tests/test_90_entry_points.py b/tests/test_90_entry_points.py index 4ecebda..9aaa8e5 100644 --- a/tests/test_90_entry_points.py +++ b/tests/test_90_entry_points.py @@ -640,12 +640,12 @@ def test_update_catalogue( os.path.join( TESTDATA_PATH, "cads-contents-json", - "how-to-api", - "layout.json", + "copernicus-interactive-climates-atlas", + "cica-overview.png", ), object_storage_url, bucket_name=bucket_name, - subpath="contents/ads/page/how-to-api", + subpath="contents/cds/application/copernicus-interactive-climates-atlas", **object_storage_kws, ), ] From 2fd5db0d7769a783d13fa58bc21f32eadecc53df Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 17 Oct 2024 10:03:29 +0200 Subject: [PATCH 2/4] more tests for html blocks --- .../cads-contents-json/how-to-api/layout.json | 3 +- tests/data/cads-contents-json/html_block.html | 1 + tests/test_15_contents.py | 68 ++++++++++++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 tests/data/cads-contents-json/html_block.html diff --git a/tests/data/cads-contents-json/how-to-api/layout.json b/tests/data/cads-contents-json/how-to-api/layout.json index 7c000f2..2ded561 100644 --- a/tests/data/cads-contents-json/how-to-api/layout.json +++ b/tests/data/cads-contents-json/how-to-api/layout.json @@ -10,7 +10,8 @@ { "id": "page-content", "type": "html", - "content": "
TODO
" + "content": "
TODO
", + "content_source": "../html_block.html" } ] } diff --git a/tests/data/cads-contents-json/html_block.html b/tests/data/cads-contents-json/html_block.html new file mode 100644 index 0000000..31bf37d --- /dev/null +++ b/tests/data/cads-contents-json/html_block.html @@ -0,0 +1 @@ +

this is a content of a html block

\ No newline at end of file diff --git a/tests/test_15_contents.py b/tests/test_15_contents.py index 5fe1350..e33894b 100644 --- a/tests/test_15_contents.py +++ b/tests/test_15_contents.py @@ -1,11 +1,13 @@ import datetime import os.path +import unittest.mock from operator import itemgetter +from typing import Any import pytest_mock import sqlalchemy as sa -from cads_catalogue import config, contents, object_storage +from cads_catalogue import config, contents, layout_manager, object_storage THIS_PATH = os.path.abspath(os.path.dirname(__file__)) TESTDATA_PATH = os.path.join(THIS_PATH, "data") @@ -255,3 +257,67 @@ def test_content_sync( elif key == "keywords": continue assert getattr(db_content2, key) == value + + +def test_transform_layout(mocker: pytest_mock.MockerFixture): + mocker.patch.object(object_storage, "store_file", return_value="an url") + _store_layout_by_data = mocker.spy(layout_manager, "store_layout_by_data") + my_settings_dict = { + "object_storage_url": "https://object/storage/url/", + "storage_admin": "admin1", + "storage_password": "secret1", + "catalogue_bucket": "mycatalogue_bucket", + "document_storage_url": "https://document/storage/url/", + } + storage_settings = config.ObjectStorageSettings(**my_settings_dict) + content_folder = os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api") + initial_md_content: dict[str, Any] = { + "site": "cds", + "type": "page", + "slug": "how-to-api", + "title": "CDSAPI setup", + "description": "Access the full data store catalogue, with search and availability features", + "publication_date": "2024-09-13T10:01:50Z", + "content_update": "2024-09-16T02:10:22Z", + "link": None, + "keywords": [], + "data": None, + "layout": os.path.join(content_folder, "layout.json"), + "image": None, + } + expected_layout_data = { + "title": "CDSAPI setup", + "description": "Access the full data store catalogue, with search and availability features", + "body": { + "main": { + "sections": [ + { + "id": "main", + "blocks": [ + { + "id": "page-content", + "type": "html", + "content": "

this is a content of a html block

", + } + ], + } + ] + } + }, + } + + effective_md_content = contents.transform_layout( + initial_md_content, storage_settings + ) + expected_md_content = initial_md_content.copy() + expected_md_content["layout"] = "an url" + + assert effective_md_content == expected_md_content + assert _store_layout_by_data.mock_calls == [ + unittest.mock.call( + expected_layout_data, + expected_md_content, + storage_settings, + subpath="contents/cds/page/how-to-api", + ) + ] From 1567773dc6b5509551f3c3b1aabed05497c33221 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 17 Oct 2024 14:30:49 +0200 Subject: [PATCH 3/4] style --- cads_catalogue/layout_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cads_catalogue/layout_manager.py b/cads_catalogue/layout_manager.py index 8482c97..0b0eda1 100644 --- a/cads_catalogue/layout_manager.py +++ b/cads_catalogue/layout_manager.py @@ -513,7 +513,7 @@ def transform_html_blocks( sections = body_main.get("sections", []) for i, section in enumerate(copy.deepcopy(sections)): sections[i] = manage_html_block_in_section(section, layout_folder_path) - # search all the html blocks inside body/aside: + # search all html blocks inside body/aside: aside_section = body.get("aside", {}) if aside_section: new_data["body"]["aside"] = manage_html_block_in_section( From 1c51cef017d23bec26cc445ba50b23a378f5a651 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 17 Oct 2024 14:31:04 +0200 Subject: [PATCH 4/4] more tests on layout manager --- tests/test_30_layout_manager.py | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/tests/test_30_layout_manager.py b/tests/test_30_layout_manager.py index 7dec86e..492b10b 100644 --- a/tests/test_30_layout_manager.py +++ b/tests/test_30_layout_manager.py @@ -916,3 +916,107 @@ def test_transform_licence_acceptance_blocks(session_obj: sa.orm.sessionmaker): expected_layout_data = json.load(fp) assert out_layout_data == expected_layout_data session.close() + + +def build_layout_data(blocks): + """For testing transform_html_blocks.""" + layout_data = { + "title": "CDSAPI setup", + "description": "Access the full data store catalogue, with search and availability features", + "body": { + "main": { + "sections": [ + { + "id": "main", + "blocks": blocks, + } + ] + } + }, + } + return layout_data + + +def test_transform_html_blocks(): + # note: layout_folder_path is the reference for relative path of the html content source + layout_folder_path = os.path.join(TESTDATA_PATH, "cads-contents-json", "how-to-api") + with open( + os.path.join(TESTDATA_PATH, "cads-contents-json", "html_block.html") + ) as fp: + html_block_content = fp.read() + thumb_block = { + "id": "test_block", + "type": "thumb-markdown", + "content": "EAC4", + } + html_block_no_change = { + "id": "test_block", + "type": "html", + "content": "

this must be static

", + } + html_block_replace = { + "id": "test_block", + "type": "html", + "content_source": "../html_block.html", + } + html_block_overwrite = { + "id": "test_block", + "type": "html", + "content_source": "../html_block.html", + "content": "
to be overriden
", + } + html_block_default = { + "id": "test_block", + "type": "html", + "content_source": "../not_exist.html", + "content": "
to be overriden
", + } + html_block_broken = { + "id": "test_block", + "type": "html", + "content_source": "../not_exist.html", + } + exp_html_block_replaced = { + "id": "test_block", + "type": "html", + "content": html_block_content, + } + exp_html_block_default = { + "id": "test_block", + "type": "html", + "content": "
to be overriden
", + } + # no change + in_layout_data = build_layout_data([thumb_block, html_block_no_change, thumb_block]) + out_layout_data = layout_manager.transform_html_blocks( + in_layout_data, layout_folder_path + ) + assert out_layout_data == in_layout_data + # replace + in_layout_data = build_layout_data([thumb_block, html_block_replace, thumb_block]) + out_layout_data = layout_manager.transform_html_blocks( + in_layout_data, layout_folder_path + ) + assert out_layout_data == build_layout_data( + [thumb_block, exp_html_block_replaced, thumb_block] + ) + # overwrite + in_layout_data = build_layout_data([thumb_block, html_block_overwrite, thumb_block]) + out_layout_data = layout_manager.transform_html_blocks( + in_layout_data, layout_folder_path + ) + assert out_layout_data == build_layout_data( + [thumb_block, exp_html_block_replaced, thumb_block] + ) + # default + in_layout_data = build_layout_data([thumb_block, html_block_default, thumb_block]) + out_layout_data = layout_manager.transform_html_blocks( + in_layout_data, layout_folder_path + ) + assert out_layout_data == build_layout_data( + [thumb_block, exp_html_block_default, thumb_block] + ) + # broken + in_layout_data = build_layout_data([thumb_block, html_block_broken, thumb_block]) + with pytest.raises(ValueError): + layout_manager.transform_html_blocks(in_layout_data, layout_folder_path)