diff --git a/scout/build/genes/hgnc_gene.py b/scout/build/genes/hgnc_gene.py index afe15cb297..700d4706bd 100644 --- a/scout/build/genes/hgnc_gene.py +++ b/scout/build/genes/hgnc_gene.py @@ -17,7 +17,7 @@ def build_phenotype(phenotype_info): return phenotype_obj -def build_hgnc_gene(gene_info: dict, cyoband_coords: Dict[str, dict], build: str = "37") -> dict: +def build_hgnc_gene(gene_info: dict, cytoband_coords: Dict[str, dict], build: str = "37") -> dict: """Build a hgnc_gene object Returns: @@ -64,7 +64,7 @@ def build_hgnc_gene(gene_info: dict, cyoband_coords: Dict[str, dict], build: str if gene_info.get("chromosome") is None: # Gene not present in Ensembl. # Try to use cytoband coordinates instead - cytoband_coords: Optional[dict] = cyoband_coords.get(gene_info["location"]) + cytoband_coords: Optional[dict] = cytoband_coords.get(gene_info["location"]) if not cytoband_coords: LOG.warning( f"Gene {gene_info.get('hgnc_symbol') or gene_info.get('hgnc_id')} doesn't have coordinates and cytoband not present in database, skipping." diff --git a/scout/load/hgnc_gene.py b/scout/load/hgnc_gene.py index 4f9f9c1991..f9dd97e9c6 100644 --- a/scout/load/hgnc_gene.py +++ b/scout/load/hgnc_gene.py @@ -83,13 +83,13 @@ def load_hgnc_genes( nr_genes = len(genes) LOG.info(f"Building info for {nr_genes} genes") - cyoband_coords: Dict[str, dict] = adapter.cytoband_to_coordinates(build=build) + cytoband_coords: Dict[str, dict] = adapter.cytoband_to_coordinates(build=build) with progressbar(genes.values(), label="Building genes", length=nr_genes) as bar: for gene_data in bar: gene_obj = build_hgnc_gene( gene_data, - cyoband_coords=cyoband_coords, + cytoband_coords=cytoband_coords, build=build, ) diff --git a/tests/build/test_build_hgnc_gene.py b/tests/build/test_build_hgnc_gene.py index aa292ff365..5e9264246d 100644 --- a/tests/build/test_build_hgnc_gene.py +++ b/tests/build/test_build_hgnc_gene.py @@ -1,5 +1,3 @@ -from pprint import pprint as pp - import pytest from scout.build.genes.hgnc_gene import build_hgnc_gene @@ -11,7 +9,7 @@ def test_build_hgnc_genes(genes): # WHEN building hgnc gene objecs for hgnc_id in genes: gene_info = genes[hgnc_id] - gene_obj = build_hgnc_gene(gene_info=gene_info, cyoband_coords={}) + gene_obj = build_hgnc_gene(gene_info=gene_info, cytoband_coords={}) # THEN check that the gene models have a hgnc id assert gene_obj["hgnc_id"] @@ -25,7 +23,7 @@ def test_build_hgnc_gene(): "start": 1, "end": 1000, } - gene_obj = build_hgnc_gene(gene_info=gene_info, cyoband_coords={}) + gene_obj = build_hgnc_gene(gene_info=gene_info, cytoband_coords={}) assert gene_obj["hgnc_id"] == gene_info["hgnc_id"] assert gene_obj["hgnc_symbol"] == gene_info["hgnc_symbol"] @@ -33,7 +31,6 @@ def test_build_hgnc_gene(): assert gene_obj["ensembl_id"] == gene_info["ensembl_gene_id"] -# TODO: are 'ensembl_gene_id' and 'ensembl_id' the same thing? -both seem to be used! @pytest.mark.parametrize("key", ["hgnc_id", "hgnc_symbol", "chromosome", "start", "end"]) def test_build_hgnc_gene_missing_hgnc_symbol(test_gene, key): ## GIVEN a dictionary with gene information @@ -41,4 +38,4 @@ def test_build_hgnc_gene_missing_hgnc_symbol(test_gene, key): # WHEN deleting a required key test_gene.pop(key) # THEN calling build_hgnc_gene() will return None - assert build_hgnc_gene(gene_info=test_gene, cyoband_coords={}) is None + assert build_hgnc_gene(gene_info=test_gene, cytoband_coords={}) is None diff --git a/tests/conftest.py b/tests/conftest.py index a3e4aff5f2..9616832225 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -239,7 +239,7 @@ def gene_bulk(genes): """Return a list with HgncGene objects""" bulk = [] for gene_key in genes: - bulk.append(build_hgnc_gene(genes[gene_key])) + bulk.append(build_hgnc_gene(genes[gene_key], cytoband_coords={})) return bulk @@ -249,7 +249,7 @@ def gene_bulk_38(genes): """Return a list with HgncGene objects""" bulk = [] for gene_key in genes: - gene_obj = build_hgnc_gene(genes[gene_key]) + gene_obj = build_hgnc_gene(genes[gene_key], cytoband_coords={}) gene_obj["build"] = "38" bulk.append(gene_obj)