Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

management of html blocks in layout.json #135

Merged
merged 5 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions cads_catalogue/contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import sqlalchemy as sa
import structlog

from cads_catalogue import config, database, object_storage
from cads_catalogue import config, database, layout_manager, object_storage

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -52,6 +52,9 @@ def content_sync(
site, ctype, slug = content["site"], content["type"], content["slug"]
subpath = os.path.join("contents", site, ctype, slug)
for field in OBJECT_STORAGE_UPLOAD_FIELDS:
if field == "layout":
# already done by layout manager
continue
file_path = content.get(field)
if not file_path:
continue
Expand Down Expand Up @@ -143,9 +146,7 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An
os.path.join(content_folder, rel_path)
)
if os.path.isfile(ancillar_file_path):
metadata[ancillar_file_field] = os.path.abspath(
os.path.join(content_folder, rel_path)
)
metadata[ancillar_file_field] = ancillar_file_path
else:
raise ValueError(
f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!"
Expand All @@ -154,6 +155,44 @@ def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, An
return ret_value


def transform_layout(
content: dict[str, Any],
storage_settings: config.ObjectStorageSettings,
):
"""
Modify layout.json information inside content metadata, with related uploads to the object storage.

Parameters
----------
content: metadata of a loaded content from files
storage_settings: object with settings to access the object storage

Returns
-------
modified version of input resource metadata
"""
if not content.get("layout"):
return content
layout_file_path = content["layout"]
if not os.path.isfile(layout_file_path):
return content
layout_folder_path = os.path.dirname(layout_file_path)
with open(layout_file_path) as fp:
layout_data = json.load(fp)
logger.debug(f"input layout_data: {layout_data}")

layout_data = layout_manager.transform_html_blocks(layout_data, layout_folder_path)

logger.debug(f"output layout_data: {layout_data}")
site, ctype, slug = content["site"], content["type"], content["slug"]
subpath = os.path.join("contents", site, ctype, slug)
content["layout"] = layout_manager.store_layout_by_data(
layout_data, content, storage_settings, subpath=subpath
)
logger.debug(f"layout url: {content['layout']}")
return content


def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]:
"""
Load all contents from a folder and return a dictionary of metadata extracted.
Expand Down Expand Up @@ -214,9 +253,10 @@ def update_catalogue_contents(
"loaded %s contents from folder %s" % (len(contents), contents_package_path)
)
involved_content_props = []
for content in contents:
for content in contents[:]:
site, ctype, slug = content["site"], content["type"], content["slug"]
involved_content_props.append((site, ctype, slug))
content = transform_layout(content, storage_settings)
try:
with session.begin_nested():
content_sync(session, content, storage_settings)
Expand Down
86 changes: 85 additions & 1 deletion cads_catalogue/layout_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,86 @@ def transform_cim_blocks(
return new_data


def manage_html_block_in_section(section, layout_folder_path):
"""
Look for html blocks and modify accordingly if it has references to external file.

Parameters
----------
section: section of layout.json data
layout_folder_path: path to the folder containing layout file
"""
new_section = copy.deepcopy(section)
blocks = new_section.get("blocks", [])
for i, block in enumerate(copy.deepcopy(blocks)):
if block.get("type") == "html":
block_id = block["id"]
if "content_source" in block:
content_source = block["content_source"]
source_path = os.path.abspath(
os.path.join(layout_folder_path, content_source)
)
is_content_in_block = "content" in block
if os.path.isfile(source_path):
# replacing/overwrite
if is_content_in_block:
# overwrite
msg = (
f"found html block {block_id} with both 'content' and 'content_source': "
f"applying overwrite"
)
logger.warning(msg)
with open(source_path) as fp:
blocks[i]["content"] = fp.read()
del blocks[i]["content_source"]
elif is_content_in_block:
# default
msg = (
f"found html block {block_id} with both 'content' and 'content_source': "
f"applying default (not found source {content_source})"
)
logger.warning(msg)
del blocks[i]["content_source"]
else:
# error
raise ValueError(
f"not found referred {content_source} in html block {block_id}"
)
elif block.get("type") in ("section", "accordion"):
blocks[i] = manage_html_block_in_section(block, layout_folder_path)
return new_section


def transform_html_blocks(
layout_data: dict[str, Any], layout_folder_path: str | pathlib.Path
):
"""Transform layout.json data replacing html blocks with referred external files.

Parameters
----------
layout_data: data of the layout.json to transform
layout_folder_path: path to the folder containing layout file

Returns
-------
dict: dictionary of layout_data modified
"""
new_data = copy.deepcopy(layout_data)
# search all html blocks inside body/main/sections:
body = new_data.get("body", {})
body_main = body.get("main", {})
sections = body_main.get("sections", [])
for i, section in enumerate(copy.deepcopy(sections)):
sections[i] = manage_html_block_in_section(section, layout_folder_path)
# search all html blocks inside body/aside:
aside_section = body.get("aside", {})
if aside_section:
new_data["body"]["aside"] = manage_html_block_in_section(
aside_section, layout_folder_path
)
return new_data


def has_section_id(layout_data: dict[str, Any], section_id: str):
"""
Return True if layout has section id `section_id`.
Expand All @@ -468,6 +548,7 @@ def store_layout_by_data(
layout_data: dict[str, Any],
resource: dict[str, Any],
storage_settings: config.ObjectStorageSettings,
subpath: str | None = None,
) -> str:
"""
Store a layout.json in the object storage providing its json data.
Expand All @@ -477,14 +558,16 @@ def store_layout_by_data(
layout_data: data of the layout.json to store
resource: resource dictionary (as returned by `load_resource_from_folder`)
storage_settings: object with settings to access the object storage
subpath: bucket subpath, otherwise resources/<resource_uid> is assumed

Returns
-------
str: URL of the layout.json uploaded to the object storage
"""
# upload of modified layout.json
tempdir_path = tempfile.mkdtemp()
subpath = os.path.join("resources", resource["resource_uid"])
if not subpath:
subpath = os.path.join("resources", resource["resource_uid"])
layout_temp_path = os.path.join(tempdir_path, "layout.json")
with open(layout_temp_path, "w") as fp:
json.dump(layout_data, fp, indent=2)
Expand Down Expand Up @@ -533,6 +616,7 @@ def transform_layout(
cim_layout_path = os.path.join(
cim_folder_path, resource["resource_uid"], "quality_assurance.layout.json"
)
layout_data = transform_html_blocks(layout_data, resource_folder_path)
layout_data = transform_cim_blocks(
layout_data, cim_layout_path, resource["qa_flag"]
)
Expand Down
3 changes: 2 additions & 1 deletion tests/data/cads-contents-json/how-to-api/layout.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
{
"id": "page-content",
"type": "html",
"content": "<div>TODO</div>"
"content": "<div>TODO</div>",
"content_source": "../html_block.html"
}
]
}
Expand Down
1 change: 1 addition & 0 deletions tests/data/cads-contents-json/html_block.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<p>this is a content of a html block</p>
71 changes: 67 additions & 4 deletions tests/test_15_contents.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import datetime
import os.path
import unittest.mock
from operator import itemgetter
from typing import Any

import pytest_mock
import sqlalchemy as sa

from cads_catalogue import config, contents, object_storage
from cads_catalogue import config, contents, layout_manager, object_storage

THIS_PATH = os.path.abspath(os.path.dirname(__file__))
TESTDATA_PATH = os.path.join(THIS_PATH, "data")
Expand Down Expand Up @@ -216,7 +218,6 @@ def test_content_sync(
]
content1["publication_date"] = "2021-03-18T11:02:31Z"
content1["title"] = "new title"
content1["layout"] = os.path.join(content_folder, "cica-overview.png")
with session_obj() as session:
# db is not empty: update a content
db_content2 = contents.content_sync(session, content1, storage_settings)
Expand Down Expand Up @@ -253,8 +254,70 @@ def test_content_sync(
for key, value in content1.items():
if key in ("publication_date", "content_update"):
value = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ") # type: ignore
elif key in ("layout",):
value = "an url"
elif key == "keywords":
continue
assert getattr(db_content2, key) == value


def test_transform_layout(mocker: pytest_mock.MockerFixture):
mocker.patch.object(object_storage, "store_file", return_value="an url")
_store_layout_by_data = mocker.spy(layout_manager, "store_layout_by_data")
my_settings_dict = {
"object_storage_url": "https://object/storage/url/",
"storage_admin": "admin1",
"storage_password": "secret1",
"catalogue_bucket": "mycatalogue_bucket",
"document_storage_url": "https://document/storage/url/",
}
storage_settings = config.ObjectStorageSettings(**my_settings_dict)
content_folder = os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api")
initial_md_content: dict[str, Any] = {
"site": "cds",
"type": "page",
"slug": "how-to-api",
"title": "CDSAPI setup",
"description": "Access the full data store catalogue, with search and availability features",
"publication_date": "2024-09-13T10:01:50Z",
"content_update": "2024-09-16T02:10:22Z",
"link": None,
"keywords": [],
"data": None,
"layout": os.path.join(content_folder, "layout.json"),
"image": None,
}
expected_layout_data = {
"title": "CDSAPI setup",
"description": "Access the full data store catalogue, with search and availability features",
"body": {
"main": {
"sections": [
{
"id": "main",
"blocks": [
{
"id": "page-content",
"type": "html",
"content": "<p>this is a content of a html block</p>",
}
],
}
]
}
},
}

effective_md_content = contents.transform_layout(
initial_md_content, storage_settings
)
expected_md_content = initial_md_content.copy()
expected_md_content["layout"] = "an url"

assert effective_md_content == expected_md_content
assert _store_layout_by_data.mock_calls == [
unittest.mock.call(
expected_layout_data,
expected_md_content,
storage_settings,
subpath="contents/cds/page/how-to-api",
)
]
Loading