Skip to content

Commit

Permalink
Merge pull request #1254 from dandi/enh/metadata-cellline
Browse files Browse the repository at this point in the history
enh: add cellline parsing
  • Loading branch information
yarikoptic committed Apr 3, 2023
2 parents 47e9d97 + 65eda7c commit 84ed322
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 1 deletion.
14 changes: 13 additions & 1 deletion dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,23 @@ def extract_sex(metadata: dict) -> Optional[models.SexType]:

def extract_strain(metadata: dict) -> Optional[models.StrainType]:
value = metadata.get("strain", None)
if value is not None and value != "":
if value:
# Don't assign cell lines to strain
if value.lower().startswith("cellline:"):
return None
return models.StrainType(name=value)
else:
return None


def extract_cellLine(metadata: dict) -> Optional[str]:
value: str = metadata.get("strain", "")
if value and value.lower().startswith("cellline:"):
return value.split(":", 1)[1].strip()
else:
return None


species_map = [
(
["mouse"],
Expand Down Expand Up @@ -709,6 +720,7 @@ def extract_digest(metadata: dict) -> Optional[Dict[models.DigestType, str]]:
"age": extract_age,
"sex": extract_sex,
"strain": extract_strain,
"cellLine": extract_cellLine,
"assayType": extract_assay_type,
"anatomy": extract_anatomy,
"digest": extract_digest,
Expand Down
93 changes: 93 additions & 0 deletions dandi/tests/data/metadata/metadata2asset_cellline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be",
"schemaKey": "Asset",
"schemaVersion": "0.4.1",
"keywords": [
"test",
"sample",
"example",
"test-case"
],
"access": [
{
"schemaKey": "AccessRequirements",
"status": "dandi:OpenAccess"
}
],
"wasGeneratedBy": [
{
"schemaKey": "Session",
"identifier": "XYZ789",
"name": "XYZ789",
"description": "Some test data",
"startDate": "2020-08-31T15:58:28-04:00",
"used": [
{
"schemaKey": "Equipment",
"identifier": "probe:probe04",
"name": "Ecephys Probe"
}
]
}
],
"contentSize": 69105,
"encodingFormat": "application/x-nwb",
"digest": {
"dandi:dandi-etag": "e455839e5ab2fa659861f58a423fd17f-1"
},
"path": "/test/path",
"wasDerivedFrom": [
{
"schemaKey": "BioSample",
"identifier": "cell01",
"sampleType": {
"schemaKey": "SampleType",
"name": "cell"
},
"wasDerivedFrom": [
{
"schemaKey": "BioSample",
"identifier": "slice02",
"sampleType": {
"schemaKey": "SampleType",
"name": "slice"
},
"wasDerivedFrom": [
{
"schemaKey": "BioSample",
"identifier": "tissue03",
"sampleType": {
"schemaKey": "SampleType",
"name": "tissuesample"
}
}
]
}
]
}
],
"wasAttributedTo": [
{
"schemaKey": "Participant",
"identifier": "a1b2c3",
"age": {
"unitText": "ISO-8601 duration",
"value": "P170DT12212S",
"schemaKey": "PropertyValue",
"valueReference": {"schemaKey": "PropertyValue", "value": "dandi:BirthReference"}
},
"sex": {
"schemaKey": "SexType",
"identifier": "http://purl.obolibrary.org/obo/PATO_0000384",
"name": "Male"
},
"cellLine": "abcdef/1",
"genotype": "Typical",
"species": {
"schemaKey": "SpeciesType",
"identifier": "http://purl.obolibrary.org/obo/NCBITaxon_1234175",
"name": "Cyperus bulbosus"
}
}
]
}
56 changes: 56 additions & 0 deletions dandi/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ..dandiapi import RemoteBlobAsset
from ..metadata import (
extract_age,
extract_cellLine,
extract_species,
get_metadata,
nwb2asset,
Expand Down Expand Up @@ -156,6 +157,18 @@ def test_parse_age(age: str, duration: Union[str, Tuple[str, str]]) -> None:
assert parse_age(age) == (duration, ref)


@pytest.mark.parametrize(
"s, t",
[
("cellline: abcdef/1", "abcdef/1"),
("CellLine: cellline:1 ", "cellline:1"),
("cell line: 1", None),
],
)
def test_extract_cellLine(s, t):
assert extract_cellLine({"strain": s}) == t


@pytest.mark.parametrize(
"age, errmsg",
[
Expand Down Expand Up @@ -336,6 +349,49 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None:
},
marks=pytest.mark.obolibrary,
),
# add cellline exception to a new json object
pytest.param(
"metadata2asset_cellline.json",
{
"contentSize": 69105,
"digest": "e455839e5ab2fa659861f58a423fd17f-1",
"digest_type": "dandi_etag",
"encodingFormat": "application/x-nwb",
"experiment_description": "Experiment Description",
"experimenter": "Joe Q. Experimenter",
"id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be",
"institution": "University College",
"keywords": ["test", "sample", "example", "test-case"],
"lab": "Retriever Laboratory",
"related_publications": "A Brief History of Test Cases",
"session_description": "Some test data",
"session_id": "XYZ789",
"session_start_time": "2020-08-31T15:58:28-04:00",
"age": "23 days",
"date_of_birth": "2020-03-14T12:34:56-04:00",
"genotype": "Typical",
"sex": "M",
"strain": "cellline: abcdef/1",
"species": "http://purl.obolibrary.org/obo/NCBITaxon_1234175", # Corner case
"subject_id": "a1b2c3",
"cell_id": "cell01",
"slice_id": "slice02",
"tissue_sample_id": "tissue03",
"probe_ids": "probe04",
"number_of_electrodes": 42,
"number_of_units": 6,
"nwb_version": "2.2.5",
"nd_types": [
"Device (2)",
"DynamicTable",
"ElectricalSeries",
"ElectrodeGroup",
"Subject",
],
"path": "/test/path",
},
marks=pytest.mark.obolibrary,
),
],
)
def test_prepare_metadata(filename: str, metadata: Dict[str, Any]) -> None:
Expand Down

0 comments on commit 84ed322

Please sign in to comment.