Skip to content

Commit

Permalink
Add variant_length field
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite authored and jeromekelleher committed Jul 26, 2024
1 parent 5105ffe commit 7c78a14
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 3 deletions.
2 changes: 2 additions & 0 deletions bio2zarr/vcf2zarr/icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def make_field_def(name, vcf_type, vcf_number):
make_field_def("FILTERS", "String", "."),
make_field_def("REF", "String", "1"),
make_field_def("ALT", "String", "."),
make_field_def("rlen", "Integer", "1"), # computed field
]
return fields

Expand Down Expand Up @@ -1276,6 +1277,7 @@ def process_partition(self, partition_index):
tcw.append("FILTERS", variant.FILTERS)
tcw.append("REF", variant.REF)
tcw.append("ALT", variant.ALT)
tcw.append("rlen", variant.end - variant.start)
for field in info_fields:
tcw.append(field.full_name, variant.INFO.get(field.name, None))
if has_gt:
Expand Down
4 changes: 3 additions & 1 deletion bio2zarr/vcf2zarr/vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def inspect(path):
"variant_contig": "An identifier from the reference genome or an angle-bracketed ID"
" string pointing to a contig in the assembly file",
"variant_position": "The reference position",
"variant_length": "The length of the variant measured in bases",
"variant_id": "List of unique identifiers where applicable",
"variant_allele": "List of the reference and alternate alleles",
"variant_quality": "Phred-scaled quality score",
Expand Down Expand Up @@ -302,11 +303,12 @@ def fixed_field_spec(
]
name_map = {field.full_name: field for field in icf.metadata.fields}

# Only two of the fixed fields have a direct one-to-one mapping.
# Only three of the fixed fields have a direct one-to-one mapping.
array_specs.extend(
[
spec_from_field(name_map["QUAL"], array_name="variant_quality"),
spec_from_field(name_map["POS"], array_name="variant_position"),
spec_from_field(name_map["rlen"], array_name="variant_length"),
]
)
array_specs.extend(
Expand Down
5 changes: 3 additions & 2 deletions tests/test_icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TestSmallExample:
'ALT', 'CHROM', 'FILTERS', 'FORMAT/DP', 'FORMAT/GQ',
'FORMAT/GT', 'FORMAT/HQ', 'ID', 'INFO/AA', 'INFO/AC',
'INFO/AF', 'INFO/AN', 'INFO/DB', 'INFO/DP', 'INFO/H2',
'INFO/NS', 'POS', 'QUAL', 'REF'
'INFO/NS', 'POS', 'QUAL', 'REF', 'rlen'
)
# fmt: on

Expand Down Expand Up @@ -117,6 +117,7 @@ class TestLocalAllelesExample:
"POS",
"QUAL",
"REF",
"rlen",
)

@pytest.fixture(scope="class")
Expand Down Expand Up @@ -451,7 +452,7 @@ def icf(self, tmp_path_factory):

def test_repr(self, icf):
assert repr(icf).startswith(
"IntermediateColumnarFormat(fields=7, partitions=5, records=4665, path="
"IntermediateColumnarFormat(fields=8, partitions=5, records=4665, path="
)

def test_pos_repr(self, icf):
Expand Down
4 changes: 4 additions & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def test_position(self, ds):
[111, 112, 14370, 17330, 1110696, 1230237, 1234567, 1235237, 10],
)

def test_length(self, ds):
nt.assert_array_equal(ds["variant_length"], [1, 1, 1, 1, 1, 1, 1, 1, 2])

def test_int_info_fields(self, ds):
nt.assert_array_equal(
ds["variant_NS"],
Expand Down Expand Up @@ -938,6 +941,7 @@ def test_info_fields(self, ds):
"variant_filter",
"variant_contig",
"variant_position",
"variant_length",
"variant_allele",
"variant_id",
"variant_id_mask",
Expand Down

0 comments on commit 7c78a14

Please sign in to comment.