Skip to content

Commit

Permalink
Merge db5f22d into 8e0dd91
Browse files Browse the repository at this point in the history
  • Loading branch information
MDunitz authored Sep 30, 2021
2 parents 8e0dd91 + db5f22d commit 26d611b
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 27 deletions.
12 changes: 11 additions & 1 deletion server/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,21 @@ def get_data_adaptor(url_dataroot: str = None, dataset: str = None):
matrix_cache_manager = current_app.matrix_data_cache_manager
with get_dataset_metadata(url_dataroot=url_dataroot, dataset=dataset) as dataset_metadata:
return matrix_cache_manager.get(
cache_key=f"{url_dataroot}/{dataset}",
cache_key=dataset_metadata["s3_uri"],
create_data_function=MatrixDataLoader(
location=dataset_metadata["s3_uri"], url_dataroot=url_dataroot, app_config=app_config
).validate_and_open,
create_data_args={},
)


def evict_dataset_from_metadata_cache(url_dataroot: str = None, dataset: str = None):
try:
current_app.dataset_metadata_cache_manager.evict_by_key(f"{url_dataroot}/{dataset}")
except Exception as e:
logging.error(e)


def rest_get_data_adaptor(func):
@wraps(func)
def wrapped_function(self, dataset=None):
Expand All @@ -143,6 +150,9 @@ def wrapped_function(self, dataset=None):
data_adaptor.set_uri_path(f"{self.url_dataroot}/{dataset}")
return func(self, data_adaptor)
except (DatasetAccessError, DatasetNotFoundError, DatasetMetadataError) as e:
# if the dataset can not be found or accessed assume there is an issue with the stored metadata and remove
# it from the cache
evict_dataset_from_metadata_cache(self.url_dataroot, dataset)
return common_rest.abort_and_log(
e.status_code, f"Invalid dataset {dataset}: {e.message}", loglevel=logging.INFO, include_exc_info=True
)
Expand Down
13 changes: 9 additions & 4 deletions server/data_common/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def __init__(self):
self.data = None

def get(
self,
cache_key: str,
create_data_function: typing.Optional[typing.Callable[[str], object]] = None,
create_data_args: object = {},
self,
cache_key: str,
create_data_function: typing.Optional[typing.Callable[[str], object]] = None,
create_data_args: object = {},
):
self.data_lock.r_acquire()
if self.data:
Expand Down Expand Up @@ -209,6 +209,11 @@ def get(self, cache_key: str, create_data_function: Optional[Callable] = None, c
if cache_item:
cache_item.release()

def evict_by_key(self, cache_key: str):
evict = self.data.get(cache_key, None)
if evict:
self.evict_data(to_del=[(cache_key, evict)])

def get_extra_data(self):
"""
Return a list of tuples (cache_key, cache_item) for least recently used cache_items when the number of
Expand Down
13 changes: 10 additions & 3 deletions server/tests/unit/common/config/test_dataset_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ def test_handle_diffexp__raises_warning_for_large_datasets(self):

def test_multi_dataset(self):
config = AppConfig()
try:
os.symlink(FIXTURES_ROOT, f"{FIXTURES_ROOT}/set2")
os.symlink(FIXTURES_ROOT, f"{FIXTURES_ROOT}/set3")
except FileExistsError:
pass
# test for illegal url_dataroots
for illegal in ("../b", "!$*", "\\n", "", "(bad)"):
config.update_server_config(
Expand All @@ -142,8 +147,8 @@ def test_multi_dataset(self):
app__flask_secret_key="secret",
multi_dataset__dataroot=dict(
s1=dict(dataroot=f"{PROJECT_ROOT}/example-dataset", base_url="set1/1/2"),
s2=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set2"),
s3=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set3"),
s2=dict(dataroot=f"{FIXTURES_ROOT}/set2", base_url="set2"),
s3=dict(dataroot=f"{FIXTURES_ROOT}/set3", base_url="set3"),
),
)

Expand Down Expand Up @@ -194,7 +199,9 @@ def test_multi_dataset(self):

response = session.get("/health")
self.assertEqual(json.loads(response.data)["status"], "pass")

# cleanup
os.unlink(f"{FIXTURES_ROOT}/set2")
os.unlink(f"{FIXTURES_ROOT}/set3")
def test_configfile_with_specialization(self):
# test that per_dataset_config config load the default config, then the specialized config

Expand Down
14 changes: 12 additions & 2 deletions server/tests/unit/common/config/test_server_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,19 @@ def test_multidataset_works_for_legal_routes(self):

@patch("server.app.app.render_template")
def test_mulitdatasets_work_e2e(self, mock_render_template):
try:
os.symlink(FIXTURES_ROOT, f"{FIXTURES_ROOT}/set2")
os.symlink(FIXTURES_ROOT, f"{FIXTURES_ROOT}/set3")
except FileExistsError:
pass

mock_render_template.return_value = "something"
# test that multi dataroots work end to end
self.config.update_server_config(
multi_dataset__dataroot=dict(
s1=dict(dataroot=f"{PROJECT_ROOT}/example-dataset", base_url="set1/1/2"),
s2=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set2"),
s3=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set3"),
s2=dict(dataroot=f"{FIXTURES_ROOT}/set2", base_url="set2"),
s3=dict(dataroot=f"{FIXTURES_ROOT}/set3", base_url="set3"),
)
)

Expand Down Expand Up @@ -279,6 +285,10 @@ def test_mulitdatasets_work_e2e(self, mock_render_template):
response = session.get("/set2/pbmc3k.cxg/")
self.assertEqual(response.status_code, 200)

# cleanup
os.unlink(f"{FIXTURES_ROOT}/set2")
os.unlink(f"{FIXTURES_ROOT}/set3")

@patch("server.common.config.server_config.diffexp_tiledb.set_config")
def test_handle_diffexp(self, mock_tiledb_config):
custom_config_file = self.custom_app_config(
Expand Down
64 changes: 47 additions & 17 deletions server/tests/unit/common/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import requests

from server.app.app import evict_dataset_from_metadata_cache
from server.common.config.app_config import AppConfig
from server.tests import decode_fbs, FIXTURES_ROOT
from server.tests.fixtures.fixtures import pbmc3k_colors
Expand Down Expand Up @@ -615,10 +616,10 @@ def test_metadata_api_called_for_new_dataset(self, mock_get):
self.assertEqual(mock_get.call_count, 1)
self.assertEqual(
f"http://{mock_get._mock_call_args[1]['url']}",
"http://api.cellxgene.staging.single-cell.czi.technology/dp/v1/datasets/meta?url=https://cellxgene.staging.single-cell.czi.technology.com/e/pbmc3k_v0.cxg/", # noqa E501
"http://api.cellxgene.staging.single-cell.czi.technology/dp/v1/datasets/meta?url=https://cellxgene.staging.single-cell.czi.technology.com/e/pbmc3k_v0.cxg/",
# noqa E501
)


@patch("server.data_common.dataset_metadata.requests.get")
def test_data_locator_defaults_to_name_based_lookup_if_metadata_api_throws_error(self, mock_get):
self.TEST_DATASET_URL_BASE = "/e/pbmc3k.cxg"
Expand All @@ -633,7 +634,8 @@ def test_data_locator_defaults_to_name_based_lookup_if_metadata_api_throws_error
self.assertEqual(mock_get.call_count, 1)
self.assertEqual(
f"http://{mock_get._mock_call_args[1]['url']}",
'http://api.cellxgene.staging.single-cell.czi.technology/dp/v1/datasets/meta?url=https://cellxgene.staging.single-cell.czi.technology.com/e/pbmc3k.cxg/', # noqa E501
'http://api.cellxgene.staging.single-cell.czi.technology/dp/v1/datasets/meta?url=https://cellxgene.staging.single-cell.czi.technology.com/e/pbmc3k.cxg/',
# noqa E501
)

# check schema loads correctly even with metadata api exception
Expand Down Expand Up @@ -715,7 +717,6 @@ def test_dataset_does_not_exist(self):
response = self.client.get(url)
self.assertEqual(response.status_code, 404)


@patch("server.data_common.dataset_metadata.requests.get")
def test_tombstoned_datasets_redirect_to_data_portal(self, mock_get):
response_body = json.dumps({
Expand All @@ -731,11 +732,40 @@ def test_tombstoned_datasets_redirect_to_data_portal(self, mock_get):
url = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/{endpoint}"
result = self.client.get(url)
self.assertEqual(result.status_code, 302)
self.assertEqual(result.headers['Location'], "https://cellxgene.staging.single-cell.czi.technology.com/collections/4f098ff4-4a12-446b-a841-91ba3d8e3fa6?tombstoned_dataset_id=2fa37b10-ab4d-49c9-97a8-b4b3d80bf939") # noqa E501
self.assertEqual(result.headers['Location'],
"https://cellxgene.staging.single-cell.czi.technology.com/collections/4f098ff4-4a12-446b-a841-91ba3d8e3fa6?tombstoned_dataset_id=2fa37b10-ab4d-49c9-97a8-b4b3d80bf939") # noqa E501

@patch("server.app.app.evict_dataset_from_metadata_cache")
@patch("server.data_common.dataset_metadata.request_dataset_metadata_from_data_portal")
def test_metadata_cache_item_invalidated_on_errors(self, mock_dp, mock_expire):
mock_expire.side_effect = evict_dataset_from_metadata_cache
response_body_bad = {
"collection_id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"dataset_id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"s3_uri": f"BAD_PATH_TO_DATASET/pbmc3k.cxg",
"tombstoned": False,
}
mock_dp.return_value = response_body_bad
TEST_DATASET_URL_BASE = "/e/pbmc3k_v3.cxg"
url = f"{TEST_DATASET_URL_BASE}/api/v0.2/config"
bad_response = self.client.get(url)
self.assertEqual(bad_response.status_code, 404)
self.assertEqual(mock_expire.call_count, 1)
response_body_good = {
"collection_id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"dataset_id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"s3_uri": f"{FIXTURES_ROOT}/pbmc3k.cxg",
"tombstoned": False,
}
mock_dp.return_value = response_body_good
good_response = self.client.get(url)

class TestDatasetMetadata(BaseTest):
self.assertEqual(good_response.status_code, 200)
self.assertEqual(mock_dp.call_count, 2)

class TestDatasetMetadata(BaseTest):
@classmethod
def setUpClass(cls):
cls.data_locator_api_base = "api.cellxgene.staging.single-cell.czi.technology/dp/v1"
Expand Down Expand Up @@ -768,21 +798,21 @@ def test_dataset_metadata_api_called(self, mock_get, mock_dp):
self.TEST_URL_BASE = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/"

response_body = {
"contact_email": "test_email",
"contact_name": "test_user",
"contact_email": "test_email",
"contact_name": "test_user",
"datasets": [
{
"collection_visibility": "PUBLIC",
"id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"name": "Test Dataset",
},
],
"description": "test_description",
"id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"name": "Test Dataset",
},
],
"description": "test_description",
"id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"links": [
"http://test.link",
],
"name": "Test Collection",
],
"name": "Test Collection",
"visibility": "PUBLIC",
}

Expand Down

0 comments on commit 26d611b

Please sign in to comment.