From 11b7460b98a4d38d614ab37988fab4526cb23925 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Wed, 18 Sep 2024 18:18:31 +0200 Subject: [PATCH] different uids for each site-application --- cads_catalogue/contents.py | 69 ++++++++++++++------------- tests/test_15_contents.py | 87 +++++++++++++++++++++-------------- tests/test_90_entry_points.py | 29 ++++++------ 3 files changed, 105 insertions(+), 80 deletions(-) diff --git a/cads_catalogue/contents.py b/cads_catalogue/contents.py index 3aa15a6..55fdfb8 100644 --- a/cads_catalogue/contents.py +++ b/cads_catalogue/contents.py @@ -98,9 +98,9 @@ def content_sync( return db_content -def load_content_folder(content_folder: str | pathlib.Path) -> dict[str, Any]: +def load_content_folder(content_folder: str | pathlib.Path) -> List[dict[str, Any]]: """ - Parse a content folder and returns its metadata dictionary. + Parse folder and returns a list of metadata dictionaries, each one for a content. Parameters ---------- @@ -108,40 +108,45 @@ def load_content_folder(content_folder: str | pathlib.Path) -> dict[str, Any]: Returns ------- - dictionary of information parsed. + list of dictionaries of information parsed. """ metadata_file_path = os.path.join(content_folder, "metadata.json") with open(metadata_file_path) as fp: data = json.load(fp) - metadata = { - "site": ",".join(data["site"]), - "type": data["resource_type"], - "content_uid": data["id"], - "title": data["title"], - "description": data["abstract"], - "publication_date": data["publication_date"], - "content_update": data["update_date"], - "link": data.get("link"), - "keywords": data.get("keywords", []), - "data": data.get("data"), - # managed below: - # "image": None, - # "layout": None, - } - for ancillar_file_field in OBJECT_STORAGE_UPLOAD_FIELDS: # image, layout - metadata[ancillar_file_field] = None - rel_path = data.get(ancillar_file_field) - if rel_path: - ancillar_file_path = os.path.abspath(os.path.join(content_folder, rel_path)) - if os.path.isfile(ancillar_file_path): - metadata[ancillar_file_field] = os.path.abspath( + ret_value = [] + for site in data["site"]: + metadata = { + "site": site, + "type": data["resource_type"], + "content_uid": f"{site}-{data['resource_type']}-{data['id']}", + "title": data["title"], + "description": data["abstract"], + "publication_date": data["publication_date"], + "content_update": data["update_date"], + "link": data.get("link"), + "keywords": data.get("keywords", []), + "data": data.get("data"), + # managed below: + # "image": None, + # "layout": None, + } + for ancillar_file_field in OBJECT_STORAGE_UPLOAD_FIELDS: # image, layout + metadata[ancillar_file_field] = None + rel_path = data.get(ancillar_file_field) + if rel_path: + ancillar_file_path = os.path.abspath( os.path.join(content_folder, rel_path) ) - else: - raise ValueError( - f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!" - ) - return metadata + if os.path.isfile(ancillar_file_path): + metadata[ancillar_file_field] = os.path.abspath( + os.path.join(content_folder, rel_path) + ) + else: + raise ValueError( + f"{metadata_file_path} contains reference to {ancillar_file_field} file not found!" + ) + ret_value.append(metadata) + return ret_value def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, Any]]: @@ -169,13 +174,13 @@ def load_contents(contents_root_folder: str | pathlib.Path) -> List[dict[str, An logger.warning("unknown file %r found" % content_folder) continue try: - content_md = load_content_folder(content_folder) + contents_md = load_content_folder(content_folder) except: # noqa logger.exception( "failed parsing content in %s, error follows" % content_folder ) continue - loaded_contents.append(content_md) + loaded_contents += contents_md return loaded_contents diff --git a/tests/test_15_contents.py b/tests/test_15_contents.py index db87001..ddda908 100644 --- a/tests/test_15_contents.py +++ b/tests/test_15_contents.py @@ -1,5 +1,6 @@ import datetime import os.path +from operator import itemgetter import pytest_mock import sqlalchemy as sa @@ -15,43 +16,59 @@ def test_load_content_folder() -> None: content_folder = os.path.join( TEST_CONTENT_ROOT_PATH, "copernicus-interactive-climates-atlas" ) - expected_content = { - "content_uid": "copernicus-interactive-climates-atlas", - "publication_date": "2024-09-13T00:00:00Z", - "description": "The Copernicus Interactive Climate Atlas provides graphical " - "information about recent past trends and future changes " - "(for different scenarios and global warming levels)", - "image": os.path.join(content_folder, "cica-overview.png"), - "keywords": [ - "Product type: Application", - "Spatial coverage: Global", - "Temporal coverage: Past", - "Variable domain: Land (hydrology)", - "Variable domain: Land (physics)", - "Variable domain: Land (biosphere)", - "Provider: Copernicus C3S", - ], - "layout": None, - "link": "https://atlas.climate.copernicus.eu/atlas", - "content_update": "2024-09-16T00:00:00Z", - "site": "cds", - "title": "Copernicus Interactive Climate Atlas", - "type": "application", - "data": { - "file-format": "GRIB (optional conversion to netCDF)", - "data-type": "Gridded", - "horizontal-coverage": "Global", - }, - } + expected_contents = [ + { + "content_uid": "cds-application-copernicus-interactive-climates-atlas", + "publication_date": "2024-09-13T00:00:00Z", + "description": "The Copernicus Interactive Climate Atlas provides graphical " + "information about recent past trends and future changes " + "(for different scenarios and global warming levels)", + "image": os.path.join(content_folder, "cica-overview.png"), + "keywords": [ + "Product type: Application", + "Spatial coverage: Global", + "Temporal coverage: Past", + "Variable domain: Land (hydrology)", + "Variable domain: Land (physics)", + "Variable domain: Land (biosphere)", + "Provider: Copernicus C3S", + ], + "layout": None, + "link": "https://atlas.climate.copernicus.eu/atlas", + "content_update": "2024-09-16T00:00:00Z", + "site": "cds", + "title": "Copernicus Interactive Climate Atlas", + "type": "application", + "data": { + "file-format": "GRIB (optional conversion to netCDF)", + "data-type": "Gridded", + "horizontal-coverage": "Global", + }, + } + ] - effective_content = contents.load_content_folder(content_folder) - assert effective_content == expected_content + effective_contents = contents.load_content_folder(content_folder) + assert effective_contents == expected_contents def test_load_contents() -> None: expected_contents = [ { - "content_uid": "copernicus-interactive-climates-atlas", + "content_uid": "ads-page-how-to-api", + "publication_date": "2024-09-13T10:01:50Z", + "description": "Access the full data store catalogue, with search and availability features", + "image": None, + "keywords": [], + "layout": os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api", "layout.json"), + "content_update": "2024-09-16T02:10:22Z", + "link": None, + "site": "ads", + "title": "CDSAPI setup", + "type": "page", + "data": None, + }, + { + "content_uid": "cds-application-copernicus-interactive-climates-atlas", "publication_date": "2024-09-13T00:00:00Z", "description": "The Copernicus Interactive Climate Atlas provides graphical " "information about recent past trends and future changes " @@ -83,7 +100,7 @@ def test_load_contents() -> None: }, }, { - "content_uid": "how-to-api", + "content_uid": "cds-page-how-to-api", "publication_date": "2024-09-13T10:01:50Z", "description": "Access the full data store catalogue, with search and availability features", "image": None, @@ -91,13 +108,15 @@ def test_load_contents() -> None: "layout": os.path.join(TEST_CONTENT_ROOT_PATH, "how-to-api", "layout.json"), "content_update": "2024-09-16T02:10:22Z", "link": None, - "site": "cds,ads", + "site": "cds", "title": "CDSAPI setup", "type": "page", "data": None, }, ] - effective_contents = contents.load_contents(TEST_CONTENT_ROOT_PATH) + effective_contents = sorted( + contents.load_contents(TEST_CONTENT_ROOT_PATH), key=itemgetter("content_uid") + ) assert effective_contents == expected_contents diff --git a/tests/test_90_entry_points.py b/tests/test_90_entry_points.py index 663dd4f..3c3cefe 100644 --- a/tests/test_90_entry_points.py +++ b/tests/test_90_entry_points.py @@ -617,9 +617,9 @@ def test_update_catalogue( _update_catalogue_messages.assert_called_once() _update_catalogue_messages.reset_mock() # check object storage calls - assert _store_file.call_count == 7 + assert _store_file.call_count == 8 # # overview.png * 3 = 3 (one from contents) - # # layout.json * 2 = 2 (one from contents) + # # layout.json * 3 = 3 (2 from contents) # # form.json = 1 # # constraints.json = 1 # # check object storage calls @@ -645,7 +645,7 @@ def test_update_catalogue( ), object_storage_url, bucket_name=bucket_name, - subpath="contents/how-to-api", + subpath="contents/ads-page-how-to-api", **object_storage_kws, ), ] @@ -686,13 +686,14 @@ def test_update_catalogue( "select content_uid, title, site, type from contents order by content_uid" ) assert session.execute(sa.text(sql2)).all() == [ + ("ads-page-how-to-api", "CDSAPI setup", "ads", "page"), ( - "copernicus-interactive-climates-atlas", + "cds-application-copernicus-interactive-climates-atlas", "Copernicus Interactive Climate Atlas", "cds", "application", ), - ("how-to-api", "CDSAPI setup", "cds,ads", "page"), + ("cds-page-how-to-api", "CDSAPI setup", "cds", "page"), ] # 3.bis repeat last run ------------------------------------------------------------- @@ -763,7 +764,7 @@ def test_update_catalogue( ) assert ( session.execute(sa.text("select count(*) from contents")).scalars().one() - == 2 + == 3 ) sql = ( "select catalogue_repo_commit, metadata_repo_commit, licence_repo_commit, " @@ -796,7 +797,7 @@ def test_update_catalogue( session.execute( sa.text( "update contents set title='a new title' " - "where content_uid='how-to-api'" + "where content_uid='ads-page-how-to-api'" ) ) session.commit() @@ -848,8 +849,8 @@ def test_update_catalogue( # check load of contents is run (it's forced) _update_catalogue_contents.assert_called_once() _update_catalogue_contents.reset_mock() - # check object storage called for 1 dataset, 4 licences and 2 contents (5 + 4*2 + 2) - assert _store_file.call_count == 15 + # check object storage called for 1 dataset, 4 licences and 3 contents (5 + 4*2 + 3) + assert _store_file.call_count == 16 _store_file.reset_mock() # check db changes are reset @@ -864,7 +865,7 @@ def test_update_catalogue( ) ).all() == [(True,)] assert session.execute( - sa.text("select title from contents where content_uid='how-to-api'") + sa.text("select title from contents where content_uid='ads-page-how-to-api'") ).all() == [("CDSAPI setup",)] # 5. use 'include' with a pattern that doesn't match anything ---------------------- @@ -1214,13 +1215,13 @@ def test_update_catalogue( _update_catalogue_contents.assert_called_once() _update_catalogue_contents.reset_mock() # check object storage called - assert _store_file.call_count == 50 + assert _store_file.call_count == 51 # # num.licences * 2 = 8 # # num.datasets overview.png * 2 = 16 # # num.datasets layout.json = 8 # # num.datasets form.json = 8 # # num.datasets constraints.json = 8 - # # num.contents = 2 + # # num.contents = 3 _store_file.reset_mock() # check db content @@ -1341,13 +1342,13 @@ def test_update_catalogue( _update_catalogue_contents.assert_called_once() _update_catalogue_contents.reset_mock() # check object storage called - assert _store_file.call_count == 50 + assert _store_file.call_count == 51 # # num.licences * 2 = 8 # # num.datasets overview.png * 2 = 16 # # num.datasets layout.json = 8 # # num.datasets form.json = 8 # # num.datasets constraints.json = 8 - # # num.contents = 2 + # # num.contents = 3 _store_file.reset_mock() # check db content