Skip to content

Commit

Permalink
Integrated metadata recording
Browse files Browse the repository at this point in the history
  • Loading branch information
TheChymera committed Sep 7, 2022
1 parent 7075a7c commit d0753e8
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 59 deletions.
51 changes: 14 additions & 37 deletions dandi/files/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
from ..metadata import add_common_metadata, prepare_metadata
from ..misctypes import Digest

BIDS_TO_DANDI = {
"subject": "subject_id",
"session": "session_id",
}
BIDS_ASSET_ERRORS = [
"BIDS.NON_BIDS_PATH_PLACEHOLDER",
]
BIDS_DATASET_ERRORS = [
"BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
]


@dataclass
Expand Down Expand Up @@ -66,43 +68,18 @@ def _validate(self) -> None:
bids_paths = [str(self.filepath)] + [
str(asset.filepath) for asset in self.dataset_files
]
# TODO gh-943: use RFed data structures, avoid duplicating logic
results = validate_bids(*bids_paths)
self._dataset_errors: list[str] = []
self._asset_errors = defaultdict(list)
# TODO gh-943: rename regex maybe and make sure you use messages from class.
for i in results:
if i.id == "BIDS.NON_BIDS_PATH_PLACEHOLDER":
bids_path = Path(i.path).relative_to(self.bids_root).as_posix()
self._dataset_errors.append(
f"The `{bids_path}` file was not matched by any regex schema entry."
)
self._asset_errors[bids_path].append(
"File not matched by any regex schema entry"
)
elif i.id == "BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER":
self._dataset_errors.append(
f"The `{i.path_regex}` regex pattern file"
" required by BIDS was not found."
)

# TODO gh-943: Should we add valid files to Validation result?
# The following checks seem difficult to implement.
# Reimplement this at the object level, Validator Result does not have to be one per file.
if len(results["path_listing"]) == len(results["path_tracking"]):
self._dataset_errors.append("No valid BIDS files were found")
self._asset_metadata = defaultdict(dict)
for meta in results["match_listing"]:
bids_path = (
Path(meta.pop("path")).relative_to(self.bids_root).as_posix()
)
meta = {
BIDS_TO_DANDI[k]: v
for k, v in meta.items()
if k in BIDS_TO_DANDI
}
# meta["bids_schema_version"] = results["bids_schema_version"]
self._asset_metadata[bids_path] = prepare_metadata(meta)
for i in results:
if i.id in BIDS_ASSET_ERRORS:
self._asset_errors[i.path].append(i.message)
elif i.id in BIDS_DATASET_ERRORS:
self._dataset_errors.append(i.message)
elif i.id == "BIDS.MATCH":
bids_path = Path(i.path.relative_to(self.bids_root).as_posix())
self._asset_metadata[bids_path] = prepare_metadata(i.metadata)

def get_asset_errors(self, asset: BIDSAsset) -> list[str]:
""":meta private:"""
Expand Down
2 changes: 2 additions & 0 deletions dandi/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def test_validate_bids_errors(bids_error_examples, dataset):
with open(os.path.join(selected_dataset, ".ERRORS.json")) as f:
expected_errors = json.load(f)
for i in validation_result:
if i.id == "BIDS.MATCH":
continue
error_id = i.id
if i.path:
error_path = i.path
Expand Down
59 changes: 37 additions & 22 deletions dandi/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,25 @@
from .files import find_dandi_files


BIDS_TO_DANDI = {
"subject": "subject_id",
"session": "session_id",
}


@dataclass
class ValidationResult:
origin: ValidationOrigin
severity: Severity
id: str
origin: ValidationOrigin
scope: Scope
message: str
# TODO gh-943: add dandiset_path as attribute (optional).
# TODO gh-943: should this be relative to `dataset_path`?
# would make writing tests with tmp paths a lot easier :3
# keep it absolute!!!
asset_paths: Optional[list[str]] = None
dandiset_path: Optional[Path] = None
dataset_path: Optional[Path] = None
message: Optional[str] = ""
metadata: Optional[dict] = None
path: Optional[Path] = None
path_regex: Optional[str] = None
asset_paths: Optional[list[str]] = None
path_regex: Optional[str] = ""
severity: Optional[Severity] = None


@dataclass
Expand All @@ -43,6 +46,7 @@ class Severity(Enum):
class Scope(Enum):
FILE = "file"
DANDISET = "dandiset"
DATASET = "dataset"


def validate_bids(
Expand Down Expand Up @@ -71,6 +75,11 @@ def validate_bids(
dict
Dictionary reporting required patterns not found and existing filenames not matching any
patterns.
Notes
-----
* Eventually this should be migrated to BIDS schema specified errors, see discussion here:
https://github.com/bids-standard/bids-specification/issues/1262
"""

import bidsschematools
Expand All @@ -92,6 +101,7 @@ def validate_bids(
name="bidsschematools",
version=bidsschematools.__version__,
)

for path in validation_result["path_tracking"]:
# Hard-coding exclusion here pending feature + release in:
# https://github.com/bids-standard/bids-specification/issues/1272
Expand All @@ -103,15 +113,10 @@ def validate_bids(
ValidationResult(
origin=origin,
severity=Severity.ERROR,
# For schema-integrated error code discussion, see:
# https://github.com/bids-standard/bids-specification/issues/1262
id="BIDS.NON_BIDS_PATH_PLACEHOLDER",
scope=Scope.FILE,
path=path,
message="File does not match any pattern known to BIDS.",
# TODO - discover dandiset or actually BIDS dataset
# might want separate the two
# asset_paths: Optional[list[str]] = None
dataset_path=dataset_path,
dandiset_path=dandiset_path,
)
Expand All @@ -129,19 +134,29 @@ def validate_bids(
ValidationResult(
origin=origin,
severity=Severity.ERROR,
# For schema-integrated error code discussion, see:
# https://github.com/bids-standard/bids-specification/issues/1262
id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",
scope=Scope.FILE,
scope=Scope.DATASET,
path_regex=pattern["regex"],
message="BIDS-required file is not present.",
# TODO - discover dandiset or actually BIDS dataset
# might want separate the two
# asset_paths: Optional[list[str]] = None
#dataset_path=dataset_path,
#dandiset_path=dandiset_path,
)
)
for meta in validation_result["match_listing"]:
file_path = meta.pop("path")
meta = {
BIDS_TO_DANDI[k]: v
for k, v in meta.items()
if k in BIDS_TO_DANDI
}
our_validation_result.append(
ValidationResult(
origin=origin,
id="BIDS.MATCH",
scope=Scope.FILE,
path=file_path,
metadata=meta,
)
)

return our_validation_result


Expand Down

0 comments on commit d0753e8

Please sign in to comment.