From ec17a6c5dc01416f4075e29079b3fbd3cca2d051 Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Fri, 13 Jan 2023 21:05:48 +0100 Subject: [PATCH 1/3] typo --- tests/test_utils_slugifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils_slugifier.py b/tests/test_utils_slugifier.py index a629ca3..623e6bc 100644 --- a/tests/test_utils_slugifier.py +++ b/tests/test_utils_slugifier.py @@ -7,7 +7,7 @@ # for whole tests python -m unittest tests.test_utils_slugifier # for specific test - python -m unittest tests.test_utils.TestUtilsSlugify.test_slugger + python -m unittest tests.test_utils_slugifier.TestUtilsSlugify.test_slugger """ # standard library From 2659a472eb9d8060644b76ddf9d93a8cec58fe1f Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Fri, 13 Jan 2023 21:06:03 +0100 Subject: [PATCH 2/3] Add util to extract date from content location --- geotribu_cli/utils/date_from_content.py | 118 ++++++++++++++++++++++++ tests/test_utils_date_from_content.py | 63 +++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 geotribu_cli/utils/date_from_content.py create mode 100644 tests/test_utils_date_from_content.py diff --git a/geotribu_cli/utils/date_from_content.py b/geotribu_cli/utils/date_from_content.py new file mode 100644 index 0000000..3b4d8b4 --- /dev/null +++ b/geotribu_cli/utils/date_from_content.py @@ -0,0 +1,118 @@ +#! python3 # noqa: E265 + +""" + Extract date from content path, location or name. + + Author: Julien Moura (https://github.com/guts) +""" + +# ############################################################################# +# ########## Libraries ############# +# ################################## + +# Standard library +import logging +from datetime import date, datetime +from functools import lru_cache + +# ############################################################################# +# ########## Globals ############### +# ################################## + +# logs +logger = logging.getLogger(__name__) + + +# ############################################################################# +# ########## Functions ############# +# ################################## + + +@lru_cache(maxsize=512) +def get_date_from_content_location(input_content_location: str) -> date: + """Extract date from content location string. + + Args: + input_content_location (str): content location path. + + Returns: + date: date object + + Example: + .. code-block:: python + + > sample_content_location = ( + "articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/" + ) + > sample_content_date = get_date_from_content_location(sample_content_location) + > print(type(sample_content_date), sample_content_date) + 2008-08-22 + + """ + # checks + if not isinstance(input_content_location, str) or "/" not in input_content_location: + logger.error( + ValueError(f"Input location seems to be invalid: {input_content_location}.") + ) + return None + + try: + # get the year + parts = input_content_location.split("/") + year = [p for p in parts if p.isdigit()][0] + except Exception as err: + logger.error( + f"Unable to extract year from content location: {input_content_location}" + ) + return None + + # get next part + next_part = parts[parts.index(year) + 1] + + # clean next part for rdp + if next_part.startswith("rdp_"): + next_part = next_part[4:] + + # now, the next part should contain the date within the first 10 chars + date_str = next_part[:10] + + try: + return datetime.strptime(date_str, "%Y-%m-%d").date() + except Exception as err: + logger.error(err) + return None + + +# ############################################################################# +# ##### Stand alone program ######## +# ################################## + +if __name__ == "__main__": + """Standalone execution.""" + # good + sample_content_location = ( + "articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/" + ) + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + assert isinstance(sample_content_date, date) + + # good with content folder prefix and md suffix + sample_content_location = ( + "/content/articles/2008/2008-08-22_1-introduction-a-l-api-google-maps.md" + ) + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + assert isinstance(sample_content_date, date) + + # good with content folder prefix and md suffix + sample_content_location = "content/rdp/2023/rdp_2023-01-06.md" + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + assert isinstance(sample_content_date, date) + + # bad + sample_content_location = "2008-08-22_1-introduction-a-l-api-google-maps" + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + assert sample_content_date is None diff --git a/tests/test_utils_date_from_content.py b/tests/test_utils_date_from_content.py new file mode 100644 index 0000000..6812e31 --- /dev/null +++ b/tests/test_utils_date_from_content.py @@ -0,0 +1,63 @@ +#! python3 # noqa E265 + +""" + Usage from the repo root folder: + + .. code-block:: bash + # for whole tests + python -m unittest tests.test_utils_date_from_content + # for specific test + python -m unittest tests.test_utils_date_from_content.TestUtilsDateFromContent.test_date_from_content_location +""" + +# standard library +import unittest +from datetime import date + +# project +from geotribu_cli.utils.date_from_content import get_date_from_content_location + +# ############################################################################ +# ########## Classes ############# +# ################################ + + +class TestUtilsDateFromContent(unittest.TestCase): + """Test package utilities.""" + + def test_date_from_content_location(self): + """Test minimalist slugify function.""" + # good + sample_content_location = ( + "articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/" + ) + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + self.assertIsInstance(sample_content_date, date) + + # good with content folder prefix and md suffix + sample_content_location = ( + "/content/articles/2008/2008-08-22_1-introduction-a-l-api-google-maps.md" + ) + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + self.assertIsInstance(sample_content_date, date) + + # good with content folder and rdp_ prefixes + sample_content_location = "content/rdp/2023/rdp_2023-01-06" + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + assert isinstance(sample_content_date, date) + + # bad + sample_content_location = "2008-08-22_1-introduction-a-l-api-google-maps" + sample_content_date = get_date_from_content_location(sample_content_location) + print(type(sample_content_date), sample_content_date) + self.assertIsNone(sample_content_date) + + +# ############################################################################ +# ####### Stand-alone run ######## +# ################################ +if __name__ == "__main__": + unittest.main() From be20a46c0468aa2ab9e8c9a9d63b9d2e5e861b5c Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Fri, 13 Jan 2023 21:06:19 +0100 Subject: [PATCH 3/3] Add content date to results --- geotribu_cli/subcommands/search_content.py | 23 +++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/geotribu_cli/subcommands/search_content.py b/geotribu_cli/subcommands/search_content.py index c944b42..3798663 100644 --- a/geotribu_cli/subcommands/search_content.py +++ b/geotribu_cli/subcommands/search_content.py @@ -21,6 +21,7 @@ # package from geotribu_cli.__about__ import __title__, __version__ from geotribu_cli.constants import GeotribuDefaults +from geotribu_cli.utils.date_from_content import get_date_from_content_location from geotribu_cli.utils.file_downloader import download_remote_file_to_local from geotribu_cli.utils.file_stats import is_file_older_than from geotribu_cli.utils.formatters import convert_octets @@ -64,6 +65,9 @@ def format_output_result( # columns table.add_column(header="Titre", justify="left", style="default") table.add_column(header="Type", justify="center", style="bright_black") + table.add_column( + header="Date de publication", justify="center", style="bright_black" + ) table.add_column(header="Score", style="magenta") table.add_column(header="URL", justify="right", style="blue") @@ -73,6 +77,7 @@ def format_output_result( table.add_row( r.get("titre"), r.get("type"), + f"{r.get('date'):%d %B %Y}", r.get("score"), r.get("url"), ) @@ -91,13 +96,13 @@ def generate_index_from_docs( """_summary_ Args: - input_documents_to_index (dict): _description_ - index_ref_id (str): _description_ - index_configuration (dict): _description_ - index_fieds_definition (List[dict]): _description_ + input_documents_to_index (dict): documents to index + index_ref_id (str): field to use as index primary key + index_configuration (dict): index configuration (language, etc.) + index_fieds_definition (List[dict]): fields settings (boost, etc.) Returns: - Index: _description_ + Index: lunr Index """ idx: Index = lunr( @@ -282,6 +287,11 @@ def run(args: argparse.Namespace): f"from contents listing ({local_listing_file})." ) else: + # load + with local_listing_file.open("r", encoding=("UTF-8")) as fd: + contents_listing = json.loads(fd.read()) + + # load previously built index logger.info( f"Local index file ({args.local_index_file}) exists and is not " f"older than {args.expiration_rotating_hours} hour(s). " @@ -314,14 +324,13 @@ def run(args: argparse.Namespace): else: pass - result.update({}) - # crée un résultat de sortie out_result = { "titre": result.get("title"), "type": "Article" if result.get("ref").startswith("articles/") else "GeoRDP", + "date": get_date_from_content_location(result.get("ref")), "score": f"{result.get('score'):.3}", "url": f"{defaults_settings.site_base_url}{result.get('ref')}", }