Skip to content

Commit

Permalink
bump tiledb to 0.13.1, add work-around for dense read bug (#237)
Browse files Browse the repository at this point in the history
* bump tiledb to 0.13.1, add work-around for dense read bug

* use old style python
  • Loading branch information
Bruce Martin authored Mar 25, 2022
1 parent f9a8fc4 commit 24c41e4
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 3 deletions.
2 changes: 1 addition & 1 deletion hosted/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ umap-learn==0.4.6
sentry-sdk[flask]==0.14.3
six==1.14.0
sqlalchemy==1.3.18
tiledb==0.10.1
tiledb==0.13.1
urllib3==1.26.5
Werkzeug==1.0.1
zipp==3.1.0
7 changes: 7 additions & 0 deletions server/dataset/cxg_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ def cleanup(self):
def set_tiledb_context(context_params):
"""Set the tiledb context. This should be set before any instances of CxgDataset are created"""
try:
"""
TileDB 0.13.1 has a bug in the new dense reader. This config (workaround) will
for use of the legacy reader, which works correctly. It can be removed when the
test case `test_tdb_bug` in server/tests/unit/dataest/test_cxg_dataset.py passes
"""
context_params["sm.query.dense.reader"] = "legacy"

CxgDataset.tiledb_ctx = tiledb.Ctx(context_params)
tiledb.default_ctx(context_params)

Expand Down
2 changes: 1 addition & 1 deletion server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ pandas>=1.0,!=1.1 # pandas 1.1 breaks tests, https://github.com/pandas-dev/pand
PyYAML>=5.4 # CVE-2020-14343
scipy>=1.4
requests>=2.22.0
tiledb==0.10.4
tiledb==0.13.1
s3fs==0.4.2
MarkupSafe==1.1.1
32 changes: 31 additions & 1 deletion server/tests/unit/dataset/test_cxg_dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import unittest

from werkzeug.datastructures import MultiDict

from server.common.rest import _query_parameter_to_filter
from server.common.utils.data_locator import DataLocator
from server.dataset.cxg_dataset import CxgDataset
from server.tests.unit import app_config
from server.tests import FIXTURES_ROOT
from server.tests import FIXTURES_ROOT, decode_fbs
from server.tests.fixtures.fixtures import pbmc3k_colors

import tiledb
import numpy as np


class TestCxgDataset(unittest.TestCase):
def test_get_colors(self):
Expand All @@ -18,3 +24,27 @@ def get_data(self, fixture):
data_locator = f"{FIXTURES_ROOT}/{fixture}"
config = app_config(data_locator)
return CxgDataset(DataLocator(data_locator), config)

def test_tdb_bug(self):
"""
This gives different results on 0.12.4 vs 0.13.1. Reported to TileDB.
Work-around present in server/dataset/cxg_dataset.py:set_tiledb_context()
"""
print(tiledb.__version__)
data = self.get_data("pbmc3k.cxg")
filt = _query_parameter_to_filter(
MultiDict(
[
("var:name_0", "F5"),
("var:name_0", "BEB3"),
("var:name_0", "SIK1"),
]
)
)
dat = data.summarize_var("mean", filt, 0)
summary = decode_fbs.decode_matrix_FBS(dat)
self.assertDictContainsSubset({"n_rows": 2638, "n_cols": 1, "row_idx": None}, summary)
self.assertIs(type(summary["columns"]), list)
self.assertEqual(len(summary["columns"]), 1)
self.assertEqual(len(summary["columns"][0]), 2638)
self.assertEqual(summary["columns"][0].sum(), np.float32(-19.00301))

0 comments on commit 24c41e4

Please sign in to comment.