From 97b1fbb92e544d907956f5f6aa0ca6e639e724cf Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 27 Jun 2024 09:33:39 +0200 Subject: [PATCH 1/5] added fields for new additional fts --- cads_catalogue/database.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cads_catalogue/database.py b/cads_catalogue/database.py index 079a174..0617ea7 100644 --- a/cads_catalogue/database.py +++ b/cads_catalogue/database.py @@ -232,6 +232,7 @@ class Resource(BaseModel): # fulltextsearch-related fulltext = sa.Column(sa.String) high_priority_terms = sa.Column(sa.String) + popularity = sa.Column(sa.Integer, default=1) search_field: str = sa.Column( sqlalchemy_utils.types.ts_vector.TSVectorType(regconfig="english"), sa.Computed( @@ -242,7 +243,13 @@ class Resource(BaseModel): persisted=True, ), ) - + fts: str = sa.Column( + sqlalchemy_utils.types.ts_vector.TSVectorType(regconfig="english"), + sa.Computed( + "setweight(to_tsvector('english', coalesce(high_priority_terms, '')), 'D')", + persisted=True, + ), + ) # relationship attributes resource_data = sa.orm.relationship( ResourceData, uselist=False, back_populates="resource", lazy="select" @@ -274,6 +281,7 @@ class Resource(BaseModel): __table_args__ = ( sa.Index("idx_resources_search_field", search_field, postgresql_using="gin"), + sa.Index("idx_resources_fts", fts, postgresql_using="gin"), ) From 5003922f75a785182975cfbd1211500f1e1727bf Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 27 Jun 2024 17:15:48 +0200 Subject: [PATCH 2/5] tests aligned to recent changes --- .../reanalysis-era5-land/metadata.json | 1 + tests/data/dumped_resources1.txt | 4 ++++ tests/data/dumped_resources2.txt | 4 ++++ tests/data/dumped_resources3.txt | 2 ++ tests/data/dumped_resources4.txt | 14 ++++++++++++++ tests/data/dumped_resources5.txt | 16 ++++++++++++++++ tests/data/dumped_resources6.txt | 16 ++++++++++++++++ tests/data/dumped_resources7.txt | 16 ++++++++++++++++ tests/test_01_utils.py | 4 ++-- tests/test_40_manager.py | 1 + 10 files changed, 76 insertions(+), 2 deletions(-) diff --git a/tests/data/cads-forms-json/reanalysis-era5-land/metadata.json b/tests/data/cads-forms-json/reanalysis-era5-land/metadata.json index 26e947e..66fde2c 100644 --- a/tests/data/cads-forms-json/reanalysis-era5-land/metadata.json +++ b/tests/data/cads-forms-json/reanalysis-era5-land/metadata.json @@ -47,6 +47,7 @@ "Variable domain: Land (biosphere)", "Provider: Copernicus C3S" ], + "popularity": 500, "qos_tags": ["tag1", "tag2", "tag3"], "api_enforce_constraints": true, "title": "ERA5-Land hourly data from 1950 to present", diff --git a/tests/data/dumped_resources1.txt b/tests/data/dumped_resources1.txt index 26301d3..6c2cd79 100644 --- a/tests/data/dumped_resources1.txt +++ b/tests/data/dumped_resources1.txt @@ -2,6 +2,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "constraints": "an url", "form": "an url for form.json", "layout": "an url for layout.json", @@ -55,11 +56,13 @@ "variables": [], "fulltext": null, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B" }, { "resource_id": 2, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "constraints": "an url", "form": "an url for form.json", "layout": "an url for layout.json", @@ -109,6 +112,7 @@ "variables": [], "fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means", "high_priority_terms": "", + "fts": "", "search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B" } ] \ No newline at end of file diff --git a/tests/data/dumped_resources2.txt b/tests/data/dumped_resources2.txt index ec18c8e..5eb163b 100644 --- a/tests/data/dumped_resources2.txt +++ b/tests/data/dumped_resources2.txt @@ -2,6 +2,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "constraints": "an url", "api_enforce_constraints": true, "form": "an url for form.json", @@ -55,11 +56,13 @@ "variables": [], "fulltext": null, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "search_field": "'1950':11A 'accur':92B 'across':65B 'back':87B 'climat':56B,96B 'combin':59B 'compar':38B 'complet':71B 'compon':51B 'consist':23B,73B 'data':9A,61B,82B 'dataset':20B,74B 'decad':33B,86B 'descript':93B 'ecmwf':54B 'enhanc':36B 'era5':3A,6A,15B,40B,42B,55B,101 'era5-land':5A,14B,41B 'evolut':27B 'global':70B 'goe':84B 'hour':8A 'land':4A,7A,16B,29B,43B,50B,102 'law':77B 'model':60B 'observ':63B 'past':99B 'physic':79B 'present':13A 'produc':46B,81B 'provid':21B,90B 'reanalysi':2A,19B,57B,58B,80B,100 'reanalysis-era5-land':1A 'replay':48B 'resolut':37B 'sever':32B,85B 'time':89B 'use':75B 'variabl':30B 'view':24B 'world':67B" }, { "resource_id": 2, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "constraints": "an url", "form": "a new url for form.json", "layout": "a new url for layout.json", @@ -109,6 +112,7 @@ "variables": [], "fulltext": "climate reanalysis past land era5 hydrology physics biosphere copernicus c3s conditions variables monthly means", "high_priority_terms": "", + "fts": "", "search_field": "'1950':14A 'accur':95B 'across':68B 'averag':11A 'back':90B 'biospher':110C 'c3s':112C 'climat':59B,99B,103C 'combin':62B 'compar':41B 'complet':74B 'compon':54B 'condit':113C 'consist':26B,76B 'copernicus':111C 'data':12A,64B,85B 'dataset':23B,77B 'decad':36B,89B 'descript':96B 'ecmwf':57B 'enhanc':39B 'era5':3A,8A,18B,43B,45B,58B,107C 'era5-land':7A,17B,44B 'evolut':30B 'global':73B 'goe':87B 'hydrolog':108C 'land':4A,9A,19B,32B,46B,53B,106C 'law':80B 'mean':6A,116C 'model':63B 'month':5A,10A,115C 'observ':66B 'past':102B,105C 'physic':82B,109C 'present':16A 'produc':49B,84B 'provid':24B,93B 'reanalysi':2A,22B,60B,61B,83B,104C 'reanalysis-era5-land-monthly-means':1A 'replay':51B 'resolut':40B 'sever':35B,88B 'time':92B 'use':78B 'variabl':33B,114C 'view':27B 'world':70B" } ] \ No newline at end of file diff --git a/tests/data/dumped_resources3.txt b/tests/data/dumped_resources3.txt index 305b8f0..791752c 100644 --- a/tests/data/dumped_resources3.txt +++ b/tests/data/dumped_resources3.txt @@ -2,6 +2,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "api_enforce_constraints": true, "constraints": "an url", "form": "an url", @@ -36,6 +37,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", diff --git a/tests/data/dumped_resources4.txt b/tests/data/dumped_resources4.txt index 469020d..98db207 100644 --- a/tests/data/dumped_resources4.txt +++ b/tests/data/dumped_resources4.txt @@ -2,6 +2,7 @@ { "resource_id": 4, "resource_uid": "cams-global-reanalysis-eac4", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -103,6 +104,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -899,6 +901,7 @@ { "resource_id": 3, "resource_uid": "cams-global-reanalysis-eac4-monthly", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1000,6 +1003,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -1426,6 +1430,7 @@ { "resource_id": 5, "resource_uid": "derived-near-surface-meteorological-variables", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1460,6 +1465,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1480,6 +1486,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "api_enforce_constraints": true, "constraints": "an url", "form": "an url", @@ -1514,6 +1521,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1538,6 +1546,7 @@ { "resource_id": 6, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1572,6 +1581,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1592,6 +1602,7 @@ { "resource_id": 7, "resource_uid": "reanalysis-era5-pressure-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1626,6 +1637,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1646,6 +1658,7 @@ { "resource_id": 8, "resource_uid": "satellite-surface-radiation-budget", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1680,6 +1693,7 @@ "format_version": "3", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", diff --git a/tests/data/dumped_resources5.txt b/tests/data/dumped_resources5.txt index 822eb4c..ccb0459 100644 --- a/tests/data/dumped_resources5.txt +++ b/tests/data/dumped_resources5.txt @@ -2,6 +2,7 @@ { "resource_id": 4, "resource_uid": "cams-global-reanalysis-eac4", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -103,6 +104,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -899,6 +901,7 @@ { "resource_id": 3, "resource_uid": "cams-global-reanalysis-eac4-monthly", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1000,6 +1003,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -1426,6 +1430,7 @@ { "resource_id": 5, "resource_uid": "derived-near-surface-meteorological-variables", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1460,6 +1465,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1480,6 +1486,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "api_enforce_constraints": true, "constraints": "an url", "form": "an url", @@ -1514,6 +1521,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1538,6 +1546,7 @@ { "resource_id": 6, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1572,6 +1581,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1592,6 +1602,7 @@ { "resource_id": 7, "resource_uid": "reanalysis-era5-pressure-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1626,6 +1637,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1646,6 +1658,7 @@ { "resource_id": 9, "resource_uid": "reanalysis-era5-single-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1680,6 +1693,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1700,6 +1714,7 @@ { "resource_id": 8, "resource_uid": "satellite-surface-radiation-budget", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1734,6 +1749,7 @@ "format_version": "3", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", diff --git a/tests/data/dumped_resources6.txt b/tests/data/dumped_resources6.txt index 822eb4c..ccb0459 100644 --- a/tests/data/dumped_resources6.txt +++ b/tests/data/dumped_resources6.txt @@ -2,6 +2,7 @@ { "resource_id": 4, "resource_uid": "cams-global-reanalysis-eac4", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -103,6 +104,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -899,6 +901,7 @@ { "resource_id": 3, "resource_uid": "cams-global-reanalysis-eac4-monthly", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1000,6 +1003,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -1426,6 +1430,7 @@ { "resource_id": 5, "resource_uid": "derived-near-surface-meteorological-variables", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1460,6 +1465,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1480,6 +1486,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "api_enforce_constraints": true, "constraints": "an url", "form": "an url", @@ -1514,6 +1521,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1538,6 +1546,7 @@ { "resource_id": 6, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1572,6 +1581,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1592,6 +1602,7 @@ { "resource_id": 7, "resource_uid": "reanalysis-era5-pressure-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1626,6 +1637,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1646,6 +1658,7 @@ { "resource_id": 9, "resource_uid": "reanalysis-era5-single-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1680,6 +1693,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1700,6 +1714,7 @@ { "resource_id": 8, "resource_uid": "satellite-surface-radiation-budget", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1734,6 +1749,7 @@ "format_version": "3", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", diff --git a/tests/data/dumped_resources7.txt b/tests/data/dumped_resources7.txt index f575525..28a4ede 100644 --- a/tests/data/dumped_resources7.txt +++ b/tests/data/dumped_resources7.txt @@ -2,6 +2,7 @@ { "resource_id": 4, "resource_uid": "cams-global-reanalysis-eac4", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -103,6 +104,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -899,6 +901,7 @@ { "resource_id": 3, "resource_uid": "cams-global-reanalysis-eac4-monthly", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1000,6 +1003,7 @@ "format_version": "1", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "Copernicus Atmospheric Monitoring Service", "representative_fraction": null, "responsible_organisation": "ECMWF", @@ -1426,6 +1430,7 @@ { "resource_id": 5, "resource_uid": "derived-near-surface-meteorological-variables", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1460,6 +1465,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1480,6 +1486,7 @@ { "resource_id": 1, "resource_uid": "reanalysis-era5-land", + "popularity": 500, "api_enforce_constraints": true, "constraints": "an url", "form": "an url", @@ -1514,6 +1521,7 @@ "format_version": null, "hidden": true, "high_priority_terms": "reanalysis ERA5 land", + "fts": "'era5':2 'land':3 'reanalysi':1", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1538,6 +1546,7 @@ { "resource_id": 6, "resource_uid": "reanalysis-era5-land-monthly-means", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1572,6 +1581,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1592,6 +1602,7 @@ { "resource_id": 7, "resource_uid": "reanalysis-era5-pressure-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1626,6 +1637,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1646,6 +1658,7 @@ { "resource_id": 9, "resource_uid": "reanalysis-era5-single-levels", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1680,6 +1693,7 @@ "format_version": null, "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", @@ -1700,6 +1714,7 @@ { "resource_id": 8, "resource_uid": "satellite-surface-radiation-budget", + "popularity": 1, "api_enforce_constraints": false, "constraints": "an url", "form": "an url", @@ -1734,6 +1749,7 @@ "format_version": "3", "hidden": false, "high_priority_terms": "", + "fts": "", "lineage": "EC Copernicus program", "representative_fraction": 0.25, "responsible_organisation": "ECMWF", diff --git a/tests/test_01_utils.py b/tests/test_01_utils.py index 0c9bd0c..683a570 100644 --- a/tests/test_01_utils.py +++ b/tests/test_01_utils.py @@ -43,11 +43,11 @@ def test_folders2hash() -> None: ) assert ( utils.folders2hash([test_file_path_1]).hexdigest() - == "e8fa178d247cf902502dfeca284bdd49" + == "b918f99ca582f9d2f04cb47f3b73cb51" ) assert ( utils.folders2hash([test_file_path_1, test_file_path_2]).hexdigest() - == "afd7020282c5977b199dcc569fbb9370" + == "b7875c9c1af9e7e8d8a03046b7d7f867" ) diff --git a/tests/test_40_manager.py b/tests/test_40_manager.py index e3e2cf6..03a6b5f 100644 --- a/tests/test_40_manager.py +++ b/tests/test_40_manager.py @@ -1585,6 +1585,7 @@ def test_load_resource_from_folder() -> None: "licence_uids": ["licence-to-use-copernicus-products"], "lineage": "EC Copernicus program", "mapping": None, + "popularity": 500, "publication_date": "2019-07-12", "related_resources_keywords": [], "representative_fraction": 0.25, From bc3f5a486faf3ba73efbe96f6337dd9fe9de5258 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 27 Jun 2024 17:57:37 +0200 Subject: [PATCH 3/5] style --- .../63827287c182_fields_for_advanced_fts.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 alembic/versions/63827287c182_fields_for_advanced_fts.py diff --git a/alembic/versions/63827287c182_fields_for_advanced_fts.py b/alembic/versions/63827287c182_fields_for_advanced_fts.py new file mode 100644 index 0000000..64e03f4 --- /dev/null +++ b/alembic/versions/63827287c182_fields_for_advanced_fts.py @@ -0,0 +1,46 @@ +"""fields for advanced fts. + +Revision ID: 63827287c182 +Revises: 654a874249a8 +Create Date: 2024-06-27 09:34:31.278052 + +""" + +import sqlalchemy as sa +import sqlalchemy_utils + +from alembic import op +from cads_catalogue import database + +# revision identifiers, used by Alembic. +revision = "63827287c182" +down_revision = "654a874249a8" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "resources", + sa.Column( + "fts", + sqlalchemy_utils.types.ts_vector.TSVectorType(regconfig="english"), + sa.Computed( + "to_tsvector('english', coalesce(high_priority_terms, ''))", + persisted=True, + ), + ), + ) + op.create_index( + "idx_resources_fts", + "resources", + ["fts"], + postgresql_using="gin", + ) + op.add_column("resources", sa.Column("popularity", sa.Integer, default=1)) + op.execute(database.add_rank_function_sql) + + +def downgrade() -> None: + op.drop_column("resources", "fts") + op.execute(database.drop_rank_function_sql) From 09727da6ef8495aea3bcb99f2bdce19cd760371e Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 27 Jun 2024 17:58:26 +0200 Subject: [PATCH 4/5] added custom ranking function in init db --- cads_catalogue/database.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/cads_catalogue/database.py b/cads_catalogue/database.py index 0617ea7..2734fe4 100644 --- a/cads_catalogue/database.py +++ b/cads_catalogue/database.py @@ -30,6 +30,27 @@ BaseModel = sa.orm.declarative_base(metadata=metadata) +add_rank_function_sql = """ +CREATE OR REPLACE FUNCTION ts_rank2(w real[], v1 tsvector, v2 tsvector, q tsquery, n integer) RETURNS real +LANGUAGE plpgsql +IMMUTABLE PARALLEL SAFE STRICT +AS $function$ +DECLARE + original_rank REAL; + htp_rank REAL; +BEGIN + SELECT INTO original_rank ts_rank(w,v1,q); + SELECT INTO htp_rank ts_rank(v2,q); + RETURN htp_rank*n*10 + original_rank; +END; +$function$; +""" + +drop_rank_function_sql = """ +DROP FUNCTION ts_rank2(w real[], v1 tsvector, v2 tsvector, q tsquery, n integer); +""" + + class CatalogueUpdate(BaseModel): """Catalogue manager update information ORM model.""" @@ -246,7 +267,7 @@ class Resource(BaseModel): fts: str = sa.Column( sqlalchemy_utils.types.ts_vector.TSVectorType(regconfig="english"), sa.Computed( - "setweight(to_tsvector('english', coalesce(high_priority_terms, '')), 'D')", + "to_tsvector('english', coalesce(high_priority_terms, ''))", persisted=True, ), ) @@ -340,6 +361,13 @@ def ensure_session_obj(read_only: bool = False) -> sa.orm.sessionmaker: return session_obj +def create_catalogue_functions(engine): + """Add customized functions in the catalogue database.""" + with engine.connect() as conn: + conn.execute(sa.text(add_rank_function_sql)) + conn.commit() + + def init_database(connection_string: str, force: bool = False) -> sa.engine.Engine: """Make sure the db located at URI `connection_string` exists updated and return the engine object. @@ -366,6 +394,7 @@ def init_database(connection_string: str, force: bool = False) -> sa.engine.Engi # cleanup and create the schema BaseModel.metadata.drop_all(engine) BaseModel.metadata.create_all(engine) + create_catalogue_functions(engine) alembic.command.stamp(alembic_cfg, "head") else: # check the structure is empty or incomplete @@ -380,6 +409,7 @@ def init_database(connection_string: str, force: bool = False) -> sa.engine.Engi # NOTE: tables no more in metadata are not removed with drop_all BaseModel.metadata.drop_all(engine) BaseModel.metadata.create_all(engine) + create_catalogue_functions(engine) alembic.command.stamp(alembic_cfg, "head") else: # update db structure From 19c9652316ac65286dd4ff90f6628403d3dca973 Mon Sep 17 00:00:00 2001 From: Alessio Siniscalchi Date: Thu, 27 Jun 2024 17:59:05 +0200 Subject: [PATCH 5/5] added popularity reading in catalogue manager --- cads_catalogue/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cads_catalogue/manager.py b/cads_catalogue/manager.py index 0ae23e9..6ec70a3 100644 --- a/cads_catalogue/manager.py +++ b/cads_catalogue/manager.py @@ -325,6 +325,7 @@ def load_resource_metadata_file(folder_path: str | pathlib.Path) -> dict[str, An metadata["licence_uids"] = data.get("licences", []) metadata["lineage"] = data.get("lineage") + metadata["popularity"] = data.get("popularity", 1) default_public_date = "2017-01-01" metadata["publication_date"] = data.get("publication_date") if not metadata["publication_date"]: