From 6aaee4feb35229e75455ce0ace01ae859b8119f2 Mon Sep 17 00:00:00 2001 From: willtyler Date: Sat, 27 Jul 2024 16:21:33 +0000 Subject: [PATCH] Change local alleles default to false --- bio2zarr/cli.py | 2 +- bio2zarr/vcf2zarr/icf.py | 2 +- bio2zarr/vcf2zarr/verification.py | 4 ---- tests/test_cli.py | 6 +++--- tests/test_vcf_examples.py | 2 +- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/bio2zarr/cli.py b/bio2zarr/cli.py index 98f39ee..0722521 100644 --- a/bio2zarr/cli.py +++ b/bio2zarr/cli.py @@ -152,7 +152,7 @@ def list_commands(self, ctx): local_alleles = click.option( "--local-alleles/--no-local-alleles", show_default=True, - default=True, + default=False, help="Use local allele fields to reduce the storage requirements of the output.", ) diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py index f5554c5..7646826 100644 --- a/bio2zarr/vcf2zarr/icf.py +++ b/bio2zarr/vcf2zarr/icf.py @@ -1155,7 +1155,7 @@ def init( if compressor is None: compressor = ICF_DEFAULT_COMPRESSOR if local_alleles is None: - local_alleles = True + local_alleles = False vcfs = [pathlib.Path(vcf) for vcf in vcfs] target_num_partitions = max(target_num_partitions, len(vcfs)) diff --git a/bio2zarr/vcf2zarr/verification.py b/bio2zarr/vcf2zarr/verification.py index 1b414f9..27e86fe 100644 --- a/bio2zarr/vcf2zarr/verification.py +++ b/bio2zarr/vcf2zarr/verification.py @@ -170,10 +170,6 @@ def verify(vcf_path, zarr_path, show_progress=False): for colname in root.keys(): if colname.startswith("call") and not colname.startswith("call_genotype"): vcf_name = colname.split("_", 1)[1] - if vcf_name == "LAA" and vcf_name not in format_headers: - continue # LAA could have been computed during the explode step. - if vcf_name == "LPL" and vcf_name not in format_headers: - continue # LPL could have been computed during the explode step. vcf_type = format_headers[vcf_name]["Type"] vcf_number = format_headers[vcf_name]["Number"] format_fields[vcf_name] = vcf_type, vcf_number, iter(root[colname]) diff --git a/tests/test_cli.py b/tests/test_cli.py index 5bcb0ba..4508637 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -14,7 +14,7 @@ compressor=None, worker_processes=1, show_progress=True, - local_alleles=True, + local_alleles=False, ) DEFAULT_DEXPLODE_PARTITION_ARGS = dict() @@ -24,7 +24,7 @@ column_chunk_size=64, compressor=None, show_progress=True, - local_alleles=True, + local_alleles=False, ) DEFAULT_ENCODE_ARGS = dict( @@ -54,7 +54,7 @@ samples_chunk_size=None, show_progress=True, worker_processes=1, - local_alleles=True, + local_alleles=False, ) DEFAULT_PLINK_CONVERT_ARGS = dict( diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py index e4f48b5..5e2dfb8 100644 --- a/tests/test_vcf_examples.py +++ b/tests/test_vcf_examples.py @@ -537,7 +537,7 @@ class Test1000G2020Example: @pytest.fixture(scope="class") def ds(self, tmp_path_factory): out = tmp_path_factory.mktemp("data") / "example.vcf.zarr" - vcf2zarr.convert([self.data_path], out, worker_processes=0) + vcf2zarr.convert([self.data_path], out, worker_processes=0, local_alleles=True) return sg.load_dataset(out) def test_position(self, ds):