Skip to content

Commit

Permalink
Fonctionnalité : ajoute la date aux résultats de la recherche de cont…
Browse files Browse the repository at this point in the history
…enus (#34)
  • Loading branch information
Guts authored Jan 13, 2023
2 parents af850f9 + be20a46 commit fd0f95d
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 8 deletions.
23 changes: 16 additions & 7 deletions geotribu_cli/subcommands/search_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# package
from geotribu_cli.__about__ import __title__, __version__
from geotribu_cli.constants import GeotribuDefaults
from geotribu_cli.utils.date_from_content import get_date_from_content_location
from geotribu_cli.utils.file_downloader import download_remote_file_to_local
from geotribu_cli.utils.file_stats import is_file_older_than
from geotribu_cli.utils.formatters import convert_octets
Expand Down Expand Up @@ -64,6 +65,9 @@ def format_output_result(
# columns
table.add_column(header="Titre", justify="left", style="default")
table.add_column(header="Type", justify="center", style="bright_black")
table.add_column(
header="Date de publication", justify="center", style="bright_black"
)
table.add_column(header="Score", style="magenta")
table.add_column(header="URL", justify="right", style="blue")

Expand All @@ -73,6 +77,7 @@ def format_output_result(
table.add_row(
r.get("titre"),
r.get("type"),
f"{r.get('date'):%d %B %Y}",
r.get("score"),
r.get("url"),
)
Expand All @@ -91,13 +96,13 @@ def generate_index_from_docs(
"""_summary_
Args:
input_documents_to_index (dict): _description_
index_ref_id (str): _description_
index_configuration (dict): _description_
index_fieds_definition (List[dict]): _description_
input_documents_to_index (dict): documents to index
index_ref_id (str): field to use as index primary key
index_configuration (dict): index configuration (language, etc.)
index_fieds_definition (List[dict]): fields settings (boost, etc.)
Returns:
Index: _description_
Index: lunr Index
"""

idx: Index = lunr(
Expand Down Expand Up @@ -282,6 +287,11 @@ def run(args: argparse.Namespace):
f"from contents listing ({local_listing_file})."
)
else:
# load
with local_listing_file.open("r", encoding=("UTF-8")) as fd:
contents_listing = json.loads(fd.read())

# load previously built index
logger.info(
f"Local index file ({args.local_index_file}) exists and is not "
f"older than {args.expiration_rotating_hours} hour(s). "
Expand Down Expand Up @@ -314,14 +324,13 @@ def run(args: argparse.Namespace):
else:
pass

result.update({})

# crée un résultat de sortie
out_result = {
"titre": result.get("title"),
"type": "Article"
if result.get("ref").startswith("articles/")
else "GeoRDP",
"date": get_date_from_content_location(result.get("ref")),
"score": f"{result.get('score'):.3}",
"url": f"{defaults_settings.site_base_url}{result.get('ref')}",
}
Expand Down
118 changes: 118 additions & 0 deletions geotribu_cli/utils/date_from_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#! python3 # noqa: E265

"""
Extract date from content path, location or name.
Author: Julien Moura (https://github.com/guts)
"""

# #############################################################################
# ########## Libraries #############
# ##################################

# Standard library
import logging
from datetime import date, datetime
from functools import lru_cache

# #############################################################################
# ########## Globals ###############
# ##################################

# logs
logger = logging.getLogger(__name__)


# #############################################################################
# ########## Functions #############
# ##################################


@lru_cache(maxsize=512)
def get_date_from_content_location(input_content_location: str) -> date:
"""Extract date from content location string.
Args:
input_content_location (str): content location path.
Returns:
date: date object
Example:
.. code-block:: python
> sample_content_location = (
"articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/"
)
> sample_content_date = get_date_from_content_location(sample_content_location)
> print(type(sample_content_date), sample_content_date)
<class 'datetime.date'> 2008-08-22
"""
# checks
if not isinstance(input_content_location, str) or "/" not in input_content_location:
logger.error(
ValueError(f"Input location seems to be invalid: {input_content_location}.")
)
return None

try:
# get the year
parts = input_content_location.split("/")
year = [p for p in parts if p.isdigit()][0]
except Exception as err:
logger.error(
f"Unable to extract year from content location: {input_content_location}"
)
return None

# get next part
next_part = parts[parts.index(year) + 1]

# clean next part for rdp
if next_part.startswith("rdp_"):
next_part = next_part[4:]

# now, the next part should contain the date within the first 10 chars
date_str = next_part[:10]

try:
return datetime.strptime(date_str, "%Y-%m-%d").date()
except Exception as err:
logger.error(err)
return None


# #############################################################################
# ##### Stand alone program ########
# ##################################

if __name__ == "__main__":
"""Standalone execution."""
# good
sample_content_location = (
"articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/"
)
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
assert isinstance(sample_content_date, date)

# good with content folder prefix and md suffix
sample_content_location = (
"/content/articles/2008/2008-08-22_1-introduction-a-l-api-google-maps.md"
)
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
assert isinstance(sample_content_date, date)

# good with content folder prefix and md suffix
sample_content_location = "content/rdp/2023/rdp_2023-01-06.md"
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
assert isinstance(sample_content_date, date)

# bad
sample_content_location = "2008-08-22_1-introduction-a-l-api-google-maps"
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
assert sample_content_date is None
63 changes: 63 additions & 0 deletions tests/test_utils_date_from_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#! python3 # noqa E265

"""
Usage from the repo root folder:
.. code-block:: bash
# for whole tests
python -m unittest tests.test_utils_date_from_content
# for specific test
python -m unittest tests.test_utils_date_from_content.TestUtilsDateFromContent.test_date_from_content_location
"""

# standard library
import unittest
from datetime import date

# project
from geotribu_cli.utils.date_from_content import get_date_from_content_location

# ############################################################################
# ########## Classes #############
# ################################


class TestUtilsDateFromContent(unittest.TestCase):
"""Test package utilities."""

def test_date_from_content_location(self):
"""Test minimalist slugify function."""
# good
sample_content_location = (
"articles/2008/2008-08-22_1-introduction-a-l-api-google-maps/"
)
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
self.assertIsInstance(sample_content_date, date)

# good with content folder prefix and md suffix
sample_content_location = (
"/content/articles/2008/2008-08-22_1-introduction-a-l-api-google-maps.md"
)
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
self.assertIsInstance(sample_content_date, date)

# good with content folder and rdp_ prefixes
sample_content_location = "content/rdp/2023/rdp_2023-01-06"
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
assert isinstance(sample_content_date, date)

# bad
sample_content_location = "2008-08-22_1-introduction-a-l-api-google-maps"
sample_content_date = get_date_from_content_location(sample_content_location)
print(type(sample_content_date), sample_content_date)
self.assertIsNone(sample_content_date)


# ############################################################################
# ####### Stand-alone run ########
# ################################
if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion tests/test_utils_slugifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# for whole tests
python -m unittest tests.test_utils_slugifier
# for specific test
python -m unittest tests.test_utils.TestUtilsSlugify.test_slugger
python -m unittest tests.test_utils_slugifier.TestUtilsSlugify.test_slugger
"""

# standard library
Expand Down

0 comments on commit fd0f95d

Please sign in to comment.