Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: create recursive_from_prefix path if it does not exist #940

Merged
merged 13 commits into from
Jul 29, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,27 @@ def download_https(

def get_destination_path(
url: str,
dest_path: Optional[str],
dest_path: Optional[str] = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a case, where we are calling this method without a value for dest_path? If yes, we should fix that method call.
We don't want to support a usecase where we accidentally miss passing on the destination_path provided by the user.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a case, where we are calling this method without a value for dest_path?

It's only used in one place, and dest_path is provided.

I see, you're using Optional to mean String or None while at the same time enforcing that dest_path must be a parameter.

recursive_from_prefix: Optional[str] = None,
) -> str:
"""
Get the destination path for a file download.

Args:
url (str): The URL to download
dest_path (str): The destination path the files will download to
recursive_from_prefix (str): The prefix to use for recursive downloads
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Could we make this more clearer? Something like, "all files from this prefix are recursively downloaded" maybe?


Returns:
str: The destination path for the file download

"""
if not dest_path:
dest_path = os.getcwd()
dest_path = os.path.abspath(dest_path)

if not os.path.isdir(dest_path) and recursive_from_prefix:
raise ValueError("Recursive downloads require a base directory")
if not os.path.isdir(dest_path):
os.makedirs(dest_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the dest_path is None, it should point to the working directory, and will pass this check. So, the only time this exception would show up would be when the user explicitly provides an invalid value and we should error on it. I think the error message could be more verbose in explaining the issue. cc: @kandarpksk

Also, this line should already be creating the recursive directory structure.

Copy link
Contributor Author

@Bento007 Bento007 Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So error when an invalid path is provided? Invalid meaning illegal path characters or maybe a path we can't write to?


# If we're downloading recursively, we need to add the dest URL
# (minus the prefix) to the dest path.
Expand Down
49 changes: 49 additions & 0 deletions client/python/cryoet_data_portal/tests/test_file_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
from unittest.mock import patch

from cryoet_data_portal._file_tools import get_destination_path


class TestGetDestinationPath:
def test_url(self, tmp_path) -> None:
with patch("cryoet_data_portal._file_tools.os.getcwd", return_value=tmp_path):
url = "https://example.com/file.txt"
expected = os.path.join(tmp_path, "file.txt")
assert get_destination_path(url) == expected

def test_dest_path_exists(self, tmp_path) -> None:
url = "https://example.com/file.txt"
dest_path = os.path.join(tmp_path, "my_dest")
os.makedirs(dest_path)
expected = os.path.join(dest_path, "file.txt")
assert get_destination_path(url, dest_path) == expected

def test_dest_path_does_not_exist(self, tmp_path) -> None:
"""Test that the destination path is created if it does not exist"""
url = "https://example.com/file.txt"
dest_path = os.path.join(tmp_path, "my_dest")
expected = os.path.join(dest_path, "file.txt")
assert get_destination_path(url, dest_path) == expected
assert os.path.isdir(dest_path)

def test_recursive_from_prefix_where_dest_path_exist(self, tmp_path) -> None:
"""Test when a recursive_from_prefix is provided and the dest_path exists"""
url = "https://example.com/a/file.txt"
dest_path = os.path.join(tmp_path, "my_dest")
os.makedirs(dest_path)
recursive_from_prefix = "https://example.com/"
expected = os.path.join(dest_path, "a", "file.txt")
assert get_destination_path(url, dest_path, recursive_from_prefix) == expected

def test_recursive_from_prefix_where_dest_path_does_not_exist(
self,
tmp_path,
) -> None:
"""Test when a recursive_from_prefix is provided and the dest_path does not exist. The dest_path should be created."""
url = "https://example.com/a/b/file.txt"
dest_path = os.path.join(tmp_path, "my_dest")
recursive_from_prefix = "https://example.com/"
expected_path = os.path.join(dest_path, "a", "b")
expected = os.path.join(dest_path, "a", "b", "file.txt")
assert get_destination_path(url, dest_path, recursive_from_prefix) == expected
assert os.path.isdir(expected_path)
Loading