-
Notifications
You must be signed in to change notification settings - Fork 157
/
validator.py
767 lines (666 loc) · 27.8 KB
/
validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
import datetime
import json
import os
import re
import typing as ty
from collections.abc import Mapping
from copy import deepcopy
from functools import lru_cache
from pathlib import Path
import bidsschematools as bst
import bidsschematools.schema
import bidsschematools.types
import bidsschematools.utils
lgr = bst.utils.get_logger()
# The list of which entities create directories could be dynamically specified by the YAML, but for
# now, it is not.
# Ordering is important, as "subject" follows "session" alphabetically, but is hierarchically
# above it.
DIR_ENTITIES = ["subject", "session"]
def _get_paths(
bids_paths,
pseudofile_suffixes=[],
accept_dummy_paths=False,
):
"""
Get all paths from a list of directories, excluding hidden subdirectories from distribution.
Parameters
----------
bids_paths : list or str
Directories from which to get paths, may also contain file paths, which will remain
unchanged.
pseudofile_suffixes : list of str
Directory suffixes prompting the validation of the directory name and limiting further
directory walk.
accept_dummy_paths : bool, optional
Whether to accept path strings which do not correspond to either files or directories.
Notes
-----
* Figure out how to return paths from BIDS root.
* Deduplicate paths (if input dirs are subsets of other input dirs), might best be done at the
very end.
"""
# `.bidsignore` is not, in fact, a BIDS file, as per:
# https://github.com/bids-standard/bids-specification/issues/980
# Perhaps this should be parameterized for downstream flexibility and not having to keep track
# of downstream nuisance files here.
exclude_files = [
"dandiset.yaml",
]
path_list = []
bids_root_found = False
for bids_path in bids_paths:
if not accept_dummy_paths:
bids_path = os.path.abspath(os.path.expanduser(bids_path))
if os.path.isdir(bids_path):
for root, dirs, file_names in os.walk(bids_path, topdown=True):
if "dataset_description.json" in file_names:
if bids_root_found:
dirs[:] = []
file_names[:] = []
else:
bids_root_found = True
if root.endswith(tuple(pseudofile_suffixes)):
# Add the directory name to the validation paths list.
path_list.append(Path(root).as_posix() + "/")
# Do not index the contents of the directory.
dirs[:] = []
file_names[:] = []
# will break if BIDS ever puts meaningful data under `/.{dandi,datalad,git}*/`
if os.path.basename(root).startswith("."):
dirs[:] = []
file_names[:] = []
for file_name in file_names:
if file_name in exclude_files or file_name.startswith("."):
continue
file_path = os.path.join(root, file_name)
# This will need to be replaced with bids root finding.
path_list.append(Path(file_path).as_posix())
elif os.path.isfile(bids_path) or accept_dummy_paths:
path_list.append(Path(bids_path).as_posix())
else:
raise FileNotFoundError(
f"The input path `{bids_path}` could not be located. If this is a string "
"intended for path validation which does not correspond to an actual "
"path, please set the `accept_dummy_paths` parameter to True."
)
return path_list
def _capture_regex(name, pattern, backref):
"""Capture pattern to name or match back-reference to name
>>> _capture_regex("run", "[0-9]+", False)
'(?P<run>[0-9]+)'
>>> _capture_regex("run", "[0-9]+", True)
'(?P=run)'
>>> re.match(_capture_regex("run", "[0-9]+", False), "123_").groupdict()
{'run': '123'}
"""
return f"(?P={name})" if backref else f"(?P<{name}>{pattern})"
def _optional_regex(regex, optional):
"""Return an optional version of a regex if optional is True
A required regex is passed through unchanged:
>>> pattern = _optional_regex("xyz", False)
>>> pattern
'xyz'
>>> re.match(pattern, "xyz").groups()
()
>>> re.match(pattern, "") is None
True
An optional regex uses a non-capturing group, to avoid interfering
with existing groups
>>> pattern = _optional_regex("x(?P<name>[a-z])z", True)
>>> pattern
'(?:x(?P<name>[a-z])z)?'
>>> re.match(pattern, "xyz").groups()
('y',)
>>> re.match(pattern, "xyz").groupdict()
{'name': 'y'}
>>> re.match(pattern, "").groups()
(None,)
>>> re.match(pattern, "").groupdict()
{'name': None}
"""
return f"(?:{regex})?" if optional else regex
@lru_cache()
def _format_entity(entity, name, pattern, level, directory=False):
if directory and entity not in DIR_ENTITIES:
return ""
label = _capture_regex(entity, pattern, not directory and entity in DIR_ENTITIES)
post = "/" if directory else "_"
return _optional_regex(f"{name}-{label}{post}", level != "required")
def split_inheritance_rules(rule: Mapping) -> ty.List[Mapping]:
"""Break composite rules into main and sidecar rules
Implements the inheritance principle for file naming.
"""
heritable_exts = {".tsv", ".json", ".bval", ".bvec"}
rule_exts = set(rule["extensions"])
main_exts = rule_exts - heritable_exts
# If a rule only has TSV or JSON files, entities can be
# made required
if not main_exts:
if ".tsv" in rule_exts:
main_exts = {".tsv"}
elif ".json" in rule_exts:
main_exts = {".json"}
sidecar_exts = rule_exts - main_exts
if not sidecar_exts:
return [rule]
sidecar_dtypes = [""] + rule.get("datatypes", [])
sidecar_entities = {ent: "optional" for ent in rule.get("entities")}
main_rule = {**rule, **{"extensions": list(main_exts)}}
sidecar_rule = {
**rule,
**{
"extensions": list(sidecar_exts),
"datatypes": sidecar_dtypes,
"entities": sidecar_entities,
},
}
return [main_rule, sidecar_rule]
def _path_rule(rule: bst.types.Namespace):
return {"regex": re.escape(rule.path), "mandatory": rule.level == "required"}
def _sanitize_extension(ext: str) -> str:
if ext == ".*":
return r"\.[a-zA-Z0-9.]+"
return re.escape(ext)
def _stem_rule(rule: bst.types.Namespace):
stem_regex = re.escape(rule.stem)
ext_match = "|".join(_sanitize_extension(ext) for ext in rule.extensions)
ext_regex = f"(?P<extension>{ext_match})"
return {"regex": stem_regex + ext_regex, "mandatory": rule.level == "required"}
def _entity_rule(rule: Mapping, schema: bst.types.Namespace):
dir_regex = []
entity_regex = []
for ent in schema.rules.entities:
if ent not in rule["entities"]:
continue
ent_obj = rule["entities"][ent]
if isinstance(ent_obj, str):
ent_obj = {"level": ent_obj}
# Allow filename rule to override original entity fields
entity = {**schema.objects.entities[ent], **ent_obj}
if "enum" in entity:
pattern = "|".join(entity["enum"])
else:
pattern = schema.objects.formats[entity["format"]].pattern
dir_regex.append(
_format_entity(ent, entity["name"], pattern, entity["level"], directory=True)
)
entity_regex.append(_format_entity(ent, entity["name"], pattern, entity["level"]))
dtypes = set(rule.get("datatypes", ()))
optional_dtype = "" in dtypes
if optional_dtype:
dtypes.remove("")
if dtypes:
pattern = f"(?P<datatype>{'|'.join(dtypes)})/"
if optional_dtype:
pattern = f"(?:{pattern})?"
dir_regex += pattern
# If we move to referring to suffixes by keys in the object table:
# suffixes = [schema.objects.suffixes[suffix].value for suffix in rule["suffixes"]]
suffixes = rule["suffixes"]
suffix_regex = f"(?P<suffix>{'|'.join(suffixes)})"
# If we move to referring to extensions by keys in the object table:
# extensions = [schema.objects.extensions[ext].value for ext in rule["extensions"]]
extensions = rule["extensions"]
ext_match = "|".join(_sanitize_extension(ext) for ext in extensions)
ext_regex = f"(?P<extension>{ext_match})"
return {
"regex": "".join(dir_regex + entity_regex + [suffix_regex, ext_regex]),
"mandatory": False,
}
def load_filename_rules(
rule_group: bst.types.Namespace,
schema: bst.types.Namespace,
level: int,
):
"""Load schema rules into regular expressions
Parameters
----------
rule_group : Namespace
The set of rules to load from the schema
schema : Namespace
A nested dictionary, as returned by `bidsschematools.schema.load_schema()`.
level : int
The depth in rule_group to look for rules
Returns
-------
rules : list of dict
A list of dictionaries, with keys including 'regex' and 'mandatory'.
"""
regex_schema = []
for rule_template in rule_group.values(level=level):
# Simple rules, e.g. dataset_description.json, README
if "path" in rule_template:
regex_schema.append(_path_rule(rule_template))
elif "stem" in rule_template:
regex_schema.append(_stem_rule(rule_template))
else:
regex_schema.extend(
_entity_rule(rule, schema) for rule in split_inheritance_rules(rule_template)
)
return regex_schema
@lru_cache()
def load_all(
schema_dir,
):
"""
Create full path regexes for all BIDS specification files.
Parameters
----------
schema_dir : str, optional
A string pointing to a BIDS directory for which paths should be validated.
Returns
-------
all_regex : list of dict
A list of dictionaries, with keys including 'regex' and 'mandatory'.
my_schema : Mapping
Nested dictionaries representing the full schema.
"""
schema = bst.schema.load_schema(schema_dir)
all_regex = []
for group in (schema.rules.files.common, schema.rules.files.raw):
all_regex.extend(load_filename_rules(group, schema, level=2))
return all_regex, schema
def validate_all(
paths_list,
regex_schema,
):
"""
Validate `bids_paths` based on a `regex_schema` dictionary list, including regexes.
Parameters
----------
bids_paths : list or str
A string pointing to a BIDS directory for which paths should be validated, or a list
of strings pointing to individual files or subdirectories which *all* reside within
one and only one BIDS directory root (i.e. nested datasets should be validated
separately).
regex_schema : list of dict
A list of dictionaries as generated by `load_all()`.
Returns
-------
results : dict
A dictionary reporting the target files for validation, the unmatched files and unmatched
regexes, and optionally the itemwise comparison results.
Keys include "schema_tracking", "path_tracking", "path_listing", "match_listing", and
optionally "itemwise"
Notes
-----
* Multi-source validation could be accomplished by distributing the resulting tracking_schema
dictionary and further eroding it.
* Currently only entities are captured in named groups, edit `load_top_level()` to name other
groups as well.
"""
tracking_schema = deepcopy(regex_schema)
tracking_paths = deepcopy(paths_list)
itemwise_results = []
matched = False
match_listing = []
for target_path in paths_list:
lgr.debug("Checking file `%s`.", target_path)
lgr.debug("Trying file types:")
for regex_entry in tracking_schema:
target_regex = regex_entry["regex"]
lgr.debug("\t* `%s`, with pattern: `%`", target_path, target_regex)
matched = re.match(r"(?:.*/)?" + target_regex, target_path)
itemwise_result = {}
itemwise_result["path"] = target_path
itemwise_result["regex"] = target_regex
if matched:
lgr.debug("Match identified.")
itemwise_result["match"] = True
itemwise_results.append(itemwise_result)
break
itemwise_result["match"] = False
itemwise_results.append(itemwise_result)
if matched:
tracking_paths.remove(target_path)
# Might be fragile since it relies on where the loop broke:
if regex_entry["mandatory"]:
tracking_schema.remove(regex_entry)
match_entry = matched.groupdict()
match_entry["path"] = target_path
match_listing.append(match_entry)
else:
lgr.debug(
"The `%s` file could not be matched to any regex schema entry.",
target_path,
)
results = {}
results["itemwise"] = itemwise_results
results["schema_tracking"] = tracking_schema
results["schema_listing"] = regex_schema
results["path_tracking"] = tracking_paths
results["path_listing"] = paths_list
results["match_listing"] = match_listing
return results
def write_report(
validation_result,
report_path="~/.cache/bidsschematools/validator-report_{datetime}-{pid}.log",
datetime_format="%Y%m%d%H%M%SZ",
):
"""Write a human-readable report based on the validation result.
Parameters
----------
validation_result : dict
A dictionary as returned by `validate_all()` with keys including "schema_tracking",
"path_tracking", "path_listing", and, optionally "itemwise".
The "itemwise" value, if present, should be a list of dictionaries, with keys including
"path", "regex", and "match".
report_path : str, optional
A path under which the report is to be saved, `datetime`, and `pid`
are available as variables for string formatting, and will be expanded to the
current datetime (as per the `datetime_format` parameter)
and process ID, respectively.
datetime_format : str, optional
A datetime format, optionally used for the report path.
Notes
-----
* Not using f-strings in order to prevent arbitrary code execution.
"""
report_path = report_path.format(
datetime=datetime.datetime.utcnow().strftime(datetime_format),
pid=os.getpid(),
)
report_path = os.path.abspath(os.path.expanduser(report_path))
report_dir = os.path.dirname(report_path)
try:
os.makedirs(report_dir)
except OSError:
pass
total_file_count = len(validation_result["path_listing"])
validated_files_count = total_file_count - len(validation_result["path_tracking"])
with open(report_path, "w") as f:
try:
for comparison in validation_result["itemwise"]:
if comparison["match"]:
comparison_result = "A MATCH"
else:
comparison_result = "no match"
f.write(
f'- Comparing the `{comparison["path"]}` path to the `{comparison["regex"]}` '
f"pattern resulted in {comparison_result}.\n"
)
except KeyError:
pass
f.write(
f"\nSUMMARY:\n{validated_files_count} out of {total_file_count} files were "
"successfully validated, using the following regular expressions:"
)
for regex_entry in validation_result["schema_listing"]:
f.write(f'\n\t- `{regex_entry["regex"]}`')
f.write("\n")
if len(validation_result["path_tracking"]) > 0:
f.write("The following files were not matched by any regex schema entry:")
f.write("\n\t* `")
f.write("`\n\t* `".join(validation_result["path_tracking"]))
else:
f.write("All files were matched by a regex schema entry.")
if len(validation_result["schema_tracking"]) > 0:
f.write("\nThe following mandatory regex schema entries did not match any files:")
f.write("\n")
for entry in validation_result["schema_tracking"]:
if entry["mandatory"]:
f.write(f'\t** `{entry["regex"]}`\n')
else:
f.write("All mandatory BIDS files were found.\n")
f.close()
lgr.info("BIDS validation log written to %s", report_path)
def _find_dataset_description(my_path):
candidate = os.path.join(my_path, "dataset_description.json")
# Windows support... otherwise we could do `if my_path == "/"`.
if my_path == "/" or not any(i in my_path for i in ["/", "\\"]):
return None
if os.path.isfile(candidate):
return candidate
else:
level_up = os.path.dirname(my_path.rstrip("/\\"))
return _find_dataset_description(level_up)
def select_schema_dir(
bids_paths,
schema_reference_root,
schema_version,
schema_min_version,
):
"""
Select schema directory, according to a fallback logic whereby the schema path is
either (1) `schema_version` if the value is a path, (2) a concatenation of
`schema_reference_root` and `schema_version`, (3) a concatenation of the detected
version specification from a `dataset_description.json` file if one is found in
parents of the input path, (4) `schema_min_version` if no other version can be found
or if the detected version from `dataset_description.json` is smaller than
`schema_min_version`.
Parameters
----------
bids_paths : list of str
Paths to be validated.
Entries in this list will be used to crawl the directory tree upwards until a
dataset_description.json file is found.
schema_reference_root : str, optional
Path where schema versions are stored, and which contains directories named exactly
according to the respective schema version, e.g. "1.7.0".
If the path starts with the string "{module_path}" it will be expanded relative to the
module path.
schema_version : str or None
Version of BIDS schema, or path to schema.
If a path is given, this will be expanded and used directly, not concatenated with
`schema_reference_root`.
If the path starts with the string "{module_path}" it will be expanded relative to the
module path.
If None, the `dataset_description.json` fie will be queried for the dataset schema version.
schema_min_version : str
Minimal version to use UNLESS the schema version is manually specified.
If the version is auto-detected and the version is smaller than schema_min_version,
schema_min_version will be selected instead.
Returns
-------
"""
# Expand module_path
module_path = os.path.abspath(os.path.dirname(__file__))
if schema_reference_root.startswith("{module_path}"):
schema_reference_root = schema_reference_root.format(module_path=module_path)
schema_reference_root = os.path.abspath(os.path.expanduser(schema_reference_root))
# Handle path schema specification
if schema_version:
if "/" in schema_version:
schema_dir = schema_version
if schema_version.startswith("{module_path}"):
schema_dir = schema_version.format(module_path=module_path)
schema_dir = os.path.abspath(os.path.expanduser(schema_dir))
return schema_dir
schema_dir = os.path.join(schema_reference_root, schema_version)
return schema_dir
dataset_descriptions = []
for bids_path in bids_paths:
bids_path = os.path.abspath(os.path.expanduser(bids_path))
dataset_description = _find_dataset_description(bids_path)
if dataset_description and dataset_description not in dataset_descriptions:
dataset_descriptions.append(dataset_description)
if len(dataset_descriptions) > 1:
raise ValueError(
f"You have selected files belonging to {len(dataset_descriptions)} "
"different datasets. Please run the validator once per dataset."
)
if dataset_descriptions:
dataset_description = dataset_descriptions[0]
with open(dataset_description) as f:
try:
dataset_info = json.load(f)
except json.decoder.JSONDecodeError:
lgr.error(
"The `%s` file could not be loaded. "
"Please check whether the file is valid JSON. "
"Falling back to the `%s` BIDS version.",
dataset_description,
schema_min_version,
)
schema_version = schema_min_version
else:
try:
schema_version = dataset_info["BIDSVersion"]
except KeyError:
lgr.warning(
"BIDSVersion is not specified in "
"`dataset_description.json`. "
"Falling back to `%s`.",
schema_min_version,
)
schema_version = schema_min_version
if not schema_version:
lgr.warning(
"No BIDSVersion could be found for the dataset. Falling back to `%s`.",
schema_min_version,
)
schema_version = schema_min_version
elif schema_min_version:
if schema_version < schema_min_version:
lgr.warning(
"BIDSVersion `%s` is less than the minimal working "
"`%s`. "
"Falling back to `%s`. "
"To force the usage of earlier versions specify them explicitly "
"when calling the validator.",
schema_version,
schema_min_version,
schema_min_version,
)
schema_version = schema_min_version
schema_dir = os.path.join(schema_reference_root, schema_version)
if os.path.isdir(schema_dir):
return schema_dir
else:
raise ValueError(
f"The expected schema directory {schema_dir} does not exist on the system. "
"Please ensure the file exists or manually specify a schema version for "
"which the bidsschematools files are available on your system."
)
def log_errors(validation_result):
"""
Raise errors for validation result.
Parameters
----------
validation_result : dict
A dictionary as returned by `validate_all()` with keys including "schema_tracking",
"path_tracking", "path_listing", and, optionally "itemwise".
The "itemwise" value, if present, should be a list of dictionaries, with keys including
"path", "regex", and "match".
"""
total_file_count = len(validation_result["path_listing"])
validated_files_count = total_file_count - len(validation_result["path_tracking"])
if validated_files_count == 0:
lgr.error("No valid BIDS files were found.")
for entry in validation_result["schema_tracking"]:
if entry["mandatory"]:
lgr.error(
"The `%s` regex pattern file required by BIDS was not found.",
entry["regex"],
)
for i in validation_result["path_tracking"]:
lgr.warning("The `%s` file was not matched by any regex schema entry.", i)
def _get_directory_suffixes(my_schema):
"""Query schema for suffixes which identify directory entities.
Parameters
----------
my_schema : dict
Nested directory as produced by `bidsschematools.schema.load_schema()`.
Returns
-------
list of str
Directory pseudofile suffixes excluding trailing slashes.
Notes
-----
* Yes this seems super-awkward to do explicitly, after all, the trailing slash is
already in so it should automagically work, but no:
- Subdirectory names need to be dynamically excluded from validation input.
- Backslash directory delimiters are still in use, which is regrettable.
"""
pseudofile_suffixes = []
for i in my_schema["objects"]["extensions"].values():
i_value = i["value"]
if i_value.endswith("/") and i_value != "/":
pseudofile_suffixes.append(i_value[:-1])
return pseudofile_suffixes
def validate_bids(
in_paths,
accept_dummy_paths=False,
schema_reference_root="{module_path}/data/",
schema_version=None,
report_path=False,
suppress_errors=False,
schema_min_version="schema",
):
"""
Validate paths according to BIDS schema.
Parameters
----------
in_paths : str or list of str
Paths which to validate, may be individual files or directories.
accept_dummy_paths : bool, optional
Whether to accept path strings which do not correspond to either files or directories.
schema_reference_root : str, optional
Path where schema versions are stored, and which contains directories named exactly
according to the respective schema version, e.g. "1.7.0".
If the path starts with the string "{module_path}" it will be expanded relative to the
module path.
schema_version : str or None, optional
Version of BIDS schema, or path to schema.
If a path is given, this will be expanded and used directly, not concatenated with
`schema_reference_root`.
If the path starts with the string "{module_path}" it will be expanded relative to the
module path.
If None, the `dataset_description.json` fie will be queried for the dataset schema version.
report_path : bool or str, optional
If `True` a log will be written using the standard output path of `.write_report()`.
If string, the string will be used as the output path.
If the variable evaluates as False, no log will be written.
schema_min_version : str, optional
Minimal working schema version, used by the `bidsschematools.select_schema_dir()` function
only if no schema version is found or a lower schema version is specified by the dataset.
Returns
-------
results : dict
A dictionary reporting the target files for validation, the unmatched files and unmatched
regexes, and optionally the itemwise comparison results.
Keys include "schema_tracking", "path_tracking", "path_listing", "match_listing", and
optionally "itemwise"
Examples
--------
::
from bidsschematools import validator
bids_paths = '~/.data2/datalad/000026/rawdata'
schema_version='{module_path}/data/schema/'
validator.validate_bids(bids_paths, schema_version=schema_version)
Notes
-----
* Needs to account for inheritance principle, probably somewhere deeper in the logic, might be
as simple as pattern parsing and multiplying patterns to which inheritance applies.
https://github.com/bids-standard/bids-specification/pull/969#issuecomment-1132119492
"""
if isinstance(in_paths, str):
in_paths = [in_paths]
bids_schema_dir = select_schema_dir(
in_paths,
schema_reference_root,
schema_version,
schema_min_version=schema_min_version,
)
regex_schema, my_schema = load_all(bids_schema_dir)
pseudofile_suffixes = _get_directory_suffixes(my_schema)
bids_paths = _get_paths(
in_paths,
accept_dummy_paths=accept_dummy_paths,
pseudofile_suffixes=pseudofile_suffixes,
)
validation_result = validate_all(
bids_paths,
regex_schema,
)
# Record schema version.
bids_version = bst.schema._get_bids_version(bids_schema_dir)
validation_result["bids_version"] = bids_version
log_errors(validation_result)
if report_path:
if isinstance(report_path, str):
write_report(validation_result, report_path=report_path)
else:
write_report(validation_result)
return validation_result