Skip to content

Commit

Permalink
different uids for each site-application
Browse files Browse the repository at this point in the history
  • Loading branch information
alex75 committed Sep 18, 2024
1 parent 3b6aa61 commit 11b7460
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 80 deletions.
69 changes: 37 additions & 32 deletions cads_catalogue/contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,50 +98,55 @@ def content_sync(
return db_content


def load_content_folder(content_folder: str | pathlib.Path) -> dict[str, Any]:
def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, Any]]:
"""
Parse a content folder and returns its metadata dictionary.
Parse folder and returns a list of metadata dictionaries, each one for a content.
Parameters
----------
content_folder: folder path containing content files
Returns
-------
dictionary of information parsed.
list of dictionaries of information parsed.
"""
metadata_file_path = os.path.join(content_folder, "metadata.json")
with open(metadata_file_path) as fp:
data = json.load(fp)
metadata = {
"site": ",".join(data["site"]),
"type": data["resource_type"],
"content_uid": data["id"],
"title": data["title"],
"description": data["abstract"],
"publication_date": data["publication_date"],
"content_update": data["update_date"],
"link": data.get("link"),
"keywords": data.get("keywords", []),
"data": data.get("data"),
# managed below:
# "image": None,
# "layout": None,
}
for ancillar_file_field in OBJECT_STORAGE_UPLOAD_FIELDS: # image, layout
metadata[ancillar_file_field] = None
rel_path = data.get(ancillar_file_field)
if rel_path:
ancillar_file_path = os.path.abspath(os.path.join(content_folder, rel_path))
if os.path.isfile(ancillar_file_path):
metadata[ancillar_file_field] = os.path.abspath(
ret_value = []
for site in data["site"]:
metadata = {
"site": site,
"type": data["resource_type"],
"content_uid": f"{site}-{data['resource_type']}-{data['id']}",
"title": data["title"],
"description": data["abstract"],
"publication_date": data["publication_date"],
"content_update": data["update_date"],
"link": data.get("link"),
"keywords": data.get("keywords", []),
"data": data.get("data"),
# managed below:
# "image": None,
# "layout": None,
}
for ancillar_file_field in OBJECT_STORAGE_UPLOAD_FIELDS: # image, layout
metadata[ancillar_file_field] = None
rel_path = data.get(ancillar_file_field)
if rel_path:
ancillar_file_path = os.path.abspath(
os.path.join(content_folder, rel_path)
)
else:
raise ValueError(
f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!"
)
return metadata
if os.path.isfile(ancillar_file_path):
metadata[ancillar_file_field] = os.path.abspath(
os.path.join(content_folder, rel_path)
)
else:
raise ValueError(
f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!"
)
ret_value.append(metadata)
return ret_value


def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]:
Expand Down Expand Up @@ -169,13 +174,13 @@ def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, An
logger.warning("unknown file %r found" % content_folder)
continue
try:
content_md = load_content_folder(content_folder)
contents_md = load_content_folder(content_folder)
except: # noqa
logger.exception(
"failed parsing content in %s, error follows" % content_folder
)
continue
loaded_contents.append(content_md)
loaded_contents += contents_md
return loaded_contents


Expand Down
87 changes: 53 additions & 34 deletions tests/test_15_contents.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import os.path
from operator import itemgetter

import pytest_mock
import sqlalchemy as sa
Expand All @@ -15,43 +16,59 @@ def test_load_content_folder() -> None:
content_folder = os.path.join(
TEST_CONTENT_ROOT_PATH, "copernicus-interactive-climates-atlas"
)
expected_content = {
"content_uid": "copernicus-interactive-climates-atlas",
"publication_date": "2024-09-13T00:00:00Z",
"description": "The Copernicus Interactive Climate Atlas provides graphical "
"information about recent past trends and future changes "
"(for different scenarios and global warming levels)",
"image": os.path.join(content_folder, "cica-overview.png"),
"keywords": [
"Product type: Application",
"Spatial coverage: Global",
"Temporal coverage: Past",
"Variable domain: Land (hydrology)",
"Variable domain: Land (physics)",
"Variable domain: Land (biosphere)",
"Provider: Copernicus C3S",
],
"layout": None,
"link": "https://atlas.climate.copernicus.eu/atlas",
"content_update": "2024-09-16T00:00:00Z",
"site": "cds",
"title": "Copernicus Interactive Climate Atlas",
"type": "application",
"data": {
"file-format": "GRIB (optional conversion to netCDF)",
"data-type": "Gridded",
"horizontal-coverage": "Global",
},
}
expected_contents = [
{
"content_uid": "cds-application-copernicus-interactive-climates-atlas",
"publication_date": "2024-09-13T00:00:00Z",
"description": "The Copernicus Interactive Climate Atlas provides graphical "
"information about recent past trends and future changes "
"(for different scenarios and global warming levels)",
"image": os.path.join(content_folder, "cica-overview.png"),
"keywords": [
"Product type: Application",
"Spatial coverage: Global",
"Temporal coverage: Past",
"Variable domain: Land (hydrology)",
"Variable domain: Land (physics)",
"Variable domain: Land (biosphere)",
"Provider: Copernicus C3S",
],
"layout": None,
"link": "https://atlas.climate.copernicus.eu/atlas",
"content_update": "2024-09-16T00:00:00Z",
"site": "cds",
"title": "Copernicus Interactive Climate Atlas",
"type": "application",
"data": {
"file-format": "GRIB (optional conversion to netCDF)",
"data-type": "Gridded",
"horizontal-coverage": "Global",
},
}
]

effective_content = contents.load_content_folder(content_folder)
assert effective_content == expected_content
effective_contents = contents.load_content_folder(content_folder)
assert effective_contents == expected_contents


def test_load_contents() -> None:
expected_contents = [
{
"content_uid": "copernicus-interactive-climates-atlas",
"content_uid": "ads-page-how-to-api",
"publication_date": "2024-09-13T10:01:50Z",
"description": "Access the full data store catalogue, with search and availability features",
"image": None,
"keywords": [],
"layout": os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api", "layout.json"),
"content_update": "2024-09-16T02:10:22Z",
"link": None,
"site": "ads",
"title": "CDSAPI setup",
"type": "page",
"data": None,
},
{
"content_uid": "cds-application-copernicus-interactive-climates-atlas",
"publication_date": "2024-09-13T00:00:00Z",
"description": "The Copernicus Interactive Climate Atlas provides graphical "
"information about recent past trends and future changes "
Expand Down Expand Up @@ -83,21 +100,23 @@ def test_load_contents() -> None:
},
},
{
"content_uid": "how-to-api",
"content_uid": "cds-page-how-to-api",
"publication_date": "2024-09-13T10:01:50Z",
"description": "Access the full data store catalogue, with search and availability features",
"image": None,
"keywords": [],
"layout": os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api", "layout.json"),
"content_update": "2024-09-16T02:10:22Z",
"link": None,
"site": "cds,ads",
"site": "cds",
"title": "CDSAPI setup",
"type": "page",
"data": None,
},
]
effective_contents = contents.load_contents(TEST_CONTENT_ROOT_PATH)
effective_contents = sorted(
contents.load_contents(TEST_CONTENT_ROOT_PATH), key=itemgetter("content_uid")
)
assert effective_contents == expected_contents


Expand Down
29 changes: 15 additions & 14 deletions tests/test_90_entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,9 +617,9 @@ def test_update_catalogue(
_update_catalogue_messages.assert_called_once()
_update_catalogue_messages.reset_mock()
# check object storage calls
assert _store_file.call_count == 7
assert _store_file.call_count == 8
# # overview.png * 3 = 3 (one from contents)
# # layout.json * 2 = 2 (one from contents)
# # layout.json * 3 = 3 (2 from contents)
# # form.json = 1
# # constraints.json = 1
# # check object storage calls
Expand All @@ -645,7 +645,7 @@ def test_update_catalogue(
),
object_storage_url,
bucket_name=bucket_name,
subpath="contents/how-to-api",
subpath="contents/ads-page-how-to-api",
**object_storage_kws,
),
]
Expand Down Expand Up @@ -686,13 +686,14 @@ def test_update_catalogue(
"select content_uid, title, site, type from contents order by content_uid"
)
assert session.execute(sa.text(sql2)).all() == [
("ads-page-how-to-api", "CDSAPI setup", "ads", "page"),
(
"copernicus-interactive-climates-atlas",
"cds-application-copernicus-interactive-climates-atlas",
"Copernicus Interactive Climate Atlas",
"cds",
"application",
),
("how-to-api", "CDSAPI setup", "cds,ads", "page"),
("cds-page-how-to-api", "CDSAPI setup", "cds", "page"),
]

# 3.bis repeat last run -------------------------------------------------------------
Expand Down Expand Up @@ -763,7 +764,7 @@ def test_update_catalogue(
)
assert (
session.execute(sa.text("select count(*) from contents")).scalars().one()
== 2
== 3
)
sql = (
"select catalogue_repo_commit, metadata_repo_commit, licence_repo_commit, "
Expand Down Expand Up @@ -796,7 +797,7 @@ def test_update_catalogue(
session.execute(
sa.text(
"update contents set title='a new title' "
"where content_uid='how-to-api'"
"where content_uid='ads-page-how-to-api'"
)
)
session.commit()
Expand Down Expand Up @@ -848,8 +849,8 @@ def test_update_catalogue(
# check load of contents is run (it's forced)
_update_catalogue_contents.assert_called_once()
_update_catalogue_contents.reset_mock()
# check object storage called for 1 dataset, 4 licences and 2 contents (5 + 4*2 + 2)
assert _store_file.call_count == 15
# check object storage called for 1 dataset, 4 licences and 3 contents (5 + 4*2 + 3)
assert _store_file.call_count == 16
_store_file.reset_mock()

# check db changes are reset
Expand All @@ -864,7 +865,7 @@ def test_update_catalogue(
)
).all() == [(True,)]
assert session.execute(
sa.text("select title from contents where content_uid='how-to-api'")
sa.text("select title from contents where content_uid='ads-page-how-to-api'")
).all() == [("CDSAPI setup",)]

# 5. use 'include' with a pattern that doesn't match anything ----------------------
Expand Down Expand Up @@ -1214,13 +1215,13 @@ def test_update_catalogue(
_update_catalogue_contents.assert_called_once()
_update_catalogue_contents.reset_mock()
# check object storage called
assert _store_file.call_count == 50
assert _store_file.call_count == 51
# # num.licences * 2 = 8
# # num.datasets overview.png * 2 = 16
# # num.datasets layout.json = 8
# # num.datasets form.json = 8
# # num.datasets constraints.json = 8
# # num.contents = 2
# # num.contents = 3
_store_file.reset_mock()

# check db content
Expand Down Expand Up @@ -1341,13 +1342,13 @@ def test_update_catalogue(
_update_catalogue_contents.assert_called_once()
_update_catalogue_contents.reset_mock()
# check object storage called
assert _store_file.call_count == 50
assert _store_file.call_count == 51
# # num.licences * 2 = 8
# # num.datasets overview.png * 2 = 16
# # num.datasets layout.json = 8
# # num.datasets form.json = 8
# # num.datasets constraints.json = 8
# # num.contents = 2
# # num.contents = 3
_store_file.reset_mock()

# check db content
Expand Down

0 comments on commit 11b7460

Please sign in to comment.