Skip to content

Commit

Permalink
Merge pull request #295 from satra/nwb2asset
Browse files Browse the repository at this point in the history
add dandimeta migration
  • Loading branch information
satra committed Dec 2, 2020
2 parents d01c602 + c8cd5e7 commit 114b729
Show file tree
Hide file tree
Showing 3 changed files with 663 additions and 2 deletions.
188 changes: 188 additions & 0 deletions dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,3 +298,191 @@ def nwb2asset(nwb_path, digest=None, digest_type=None):

def metadata2asset(metadata):
return extract_model(models.AssetMeta, metadata)


"""
The following section converts metadata schema from the current girder dandiset
model to the new schema in dandi-cli. This section should be removed
after the migration is finished to the
"""

mapping = {
"identifier": ["identifier"],
"name": ["name"],
"description": ["description"],
"contributors": ["contributor"],
"sponsors": ["contributor", ["Sponsor"]],
"license": ["license"],
"keywords": ["keywords"],
"project": ["generatedBy"],
"conditions_studied": ["about"],
"associated_anatomy": ["about"],
"protocols": ["protocol"],
"ethicsApprovals": ["ethicsApproval"],
"access": ["access"],
"associatedData": ["relatedResource", "IsDerivedFrom"],
"publications": ["relatedResource", "IsDescribedBy"],
"age": ["variableMeasured"],
"organism": ["variableMeasured"],
"sex": ["variableMeasured"],
"number_of_subjects": ["assetsSummary", "numberOfSubjects"],
"number_of_cells": ["assetsSummary", "numberOfCells"],
"number_of_tissue_samples": ["assetsSummary", "numberOfSamples"],
}


def toContributor(value):
if not isinstance(value, list):
value = [value]
out = []
for item in value:
contrib = {}
if "name" in item:
name = item["name"].split()
item["name"] = f"{name[-1]}, {' '.join(name[:-1])}"
if "roles" in item:
roles = []
for role in item["roles"]:
tmp = role.split()
if len(tmp) > 1:
roles.append("".join([val.capitalize() for val in tmp]))
else:
roles.append(tmp.pop())
contrib["roleName"] = roles
del item["roles"]
if "awardNumber" in item:
contrib["awardNumber"] = item["awardNumber"]
del item["awardNumber"]
if "orcid" in item:
if item["orcid"]:
contrib["identifier"] = models.PropertyValue(
value=item["orcid"], propertyID="ORCID"
)
else:
contrib["identifier"] = models.PropertyValue()
del item["orcid"]
if "affiliations" in item:
item["affiliation"] = item["affiliations"]
del item["affiliations"]
contrib.update(**{f"{k}": v for k, v in item.items()})
out.append(contrib)
return out


def convertv1(data):
oldmeta = data["dandiset"] if "dandiset" in data else data
newmeta = {}
for oldkey, value in oldmeta.items():
if oldkey in ["language", "altid", "number_of_slices"]:
continue
if oldkey not in mapping:
raise KeyError(f"Could not find {oldkey}")
if len(mapping[oldkey]) == 0:
newkey = f"schema:{oldkey}"
else:
newkey = mapping[oldkey][0]
if oldkey in ["contributors", "sponsors"]:
value = toContributor(value)
if oldkey == "access":
value = [
{
"email": value["access_contact_email"],
"status": value["status"].capitalize(),
}
]
if oldkey == "identifier":
value = models.PropertyValue(value=value, propertyID="DANDI")
if len(mapping[oldkey]) == 2:
extra = mapping[oldkey][1]
if newkey == "contributor":
extrakey = "roleName"
if oldkey == "sponsors":
extrakey = "roleName"
if oldkey in ["publications", "associatedData"]:
extrakey = "relation"
if not isinstance(value, list):
value = [value]
out = []
for item in value:
if isinstance(item, dict):
out.append({k: v for k, v in item.items()})
else:
present = False
for val in out:
if item in val.values():
present = True
if not present:
out.append({"url": item})
value = out
if oldkey in [
"number_of_subjects",
"number_of_cells",
"number_of_tissue_samples",
]:
value = {extra: value}
extrakey = None
if isinstance(value, list):
for val in value:
if extrakey:
val[extrakey] = extra
if isinstance(value, dict):
if extrakey:
value[extrakey] = extra
if newkey == "variableMeasured":
if oldkey in ["age", "sex"]:
vm = {"name": oldkey}
if oldkey == "sex":
vm["value"] = value
else:
if "maximum" in value:
if "days" in value["maximum"]:
value["units"] = "days"
if "Gestational" in value["maximum"]:
value["units"] = "Gestational Week"
value["maximum"] = value["maximum"].split()[-1]
if value["maximum"].startswith("P"):
value["maximum"] = value["maximum"][1:-1]
value["units"] = value["maximum"][-1]
if "None" not in value["maximum"]:
value["maximum"] = float(value["maximum"].split()[0])
if "minimum" in value:
if "days" in value["minimum"]:
value["units"] = "days"
if "Gestational" in value["minimum"]:
value["units"] = "Gestational Week"
value["minimum"] = value["minimum"].split()[-1]
if value["minimum"].startswith("P"):
value["minimum"] = value["minimum"][1:-1]
value["units"] = value["minimum"][-1]
if "None" not in value["minimum"]:
value["minimum"] = float(value["minimum"].split()[0])
value["unitText"] = value["units"]
del value["units"]
vm.update(**value)
else:
newvalues = []
for val in value:
if "species" in val:
newvalues.append(val["species"])
vm = {"name": "species", "value": newvalues}
value = vm
if newkey not in newmeta:
newmeta[newkey] = value
else:
curvalue = newmeta[newkey]
if not isinstance(curvalue, list):
newmeta[newkey] = [curvalue]
if not isinstance(value, list):
value = [value]
newmeta[newkey].extend(value)
if "assetsSummary" in newmeta:
del newmeta["assetsSummary"]
if "variableMeasured" in newmeta:
del newmeta["variableMeasured"]
return newmeta


def migrate2newschema(meta):
newmeta = convertv1(meta)
dandimeta = models.DandiMeta.unvalidated(**newmeta)
return dandimeta
2 changes: 1 addition & 1 deletion dandi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class PropertyValue(DandiBaseModel):
minValue: float = Field(None, nskey="schema")
unitCode: Union[str, AnyUrl] = Field(None, nskey="schema")
unitText: str = Field(None, nskey="schema")
value: Union[bool, float, str, int, List[Union[bool, float, str, int]]] = Field(
value: Union[str, bool, int, float, List[Union[str, bool, int, float]]] = Field(
None, nskey="schema"
)
valueReference: "PropertyValue" = Field(
Expand Down
Loading

0 comments on commit 114b729

Please sign in to comment.