Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improved error handling vcf_parser #11

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,5 @@ jobs:
- name: Test granite
run: |
make configure
make update
make build
make test
17 changes: 16 additions & 1 deletion docs/API.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ The method *add_values_genotype(ID_genotype, values, sep=':')* allows to add val

vnt_obj.add_values_genotype(ID_genotype, values)

The method *get_genotype_value(ID_genotype, tag, sep=':')* returns value for tag from the genotype specified by corresponding ID. sep is the tags separator used in format definition and genotype(s).
The method *get_genotype_value(ID_genotype, tag, complete_genotype=False, sep=':')* returns value for tag from the genotype specified by corresponding ID. sep is the tags separator used in format definition and genotype(s).
If complete_genotype=True, return '.' if tag is missing. If complete_genotype=False (default) raise exception for the missing tag.

tag_val <str> = vnt_obj.get_genotype_value(ID_genotype, tag)

Expand All @@ -207,3 +208,17 @@ The method *get_tag_value(tag, sep=';')* returns the value from tag in INFO. sep
tag_val <str> = vnt_obj.get_tag_value(tag)

*note: tag and ID are case sensitive.*

### Custom error classes

*MissingTag* describes a missing tag or tag value.

*MissingTagDefinition* describes a missing tag definition.

*TagDefinitionError* describes a format error for a tag definition.

*TagFormatError* describes a format error for a tag.

*MissingIdentifier* describes a missing genotype identifier in the VCF file.

*VcfFormatError* describes an error in the VCF format.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Michele Berselli, Phil Grayson'

# The full version, including alpha/beta/rc tags
release = '0.2.0'
release = '0.2.1'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion granite/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Version information."""

# The following line *must* be the last in the module, exactly as formatted:
__version__ = "0.2.0"
__version__ = "0.2.1"
98 changes: 81 additions & 17 deletions granite/lib/vcf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,60 @@
import gzip


#################################################################
#
# Custom errors
# -> MissingTag
# -> MissingTagDefinition
# -> TagDefinitionError
# -> TagFormatError
# -> MissingIdentifier
# -> VcfFormatError
#
#################################################################
class MissingTag(Exception):
''' custom error class,
describe a missing tag '''

def __init__(self, message):
self.message = message

class MissingTagDefinition(Exception):
''' custom error class,
describe a missing tag definition '''

def __init__(self, message):
self.message = message

class TagDefinitionError(Exception):
''' custom error class,
describe a format error for a tag definition '''

def __init__(self, message):
self.message = message

class TagFormatError(Exception):
''' custom error class,
describe a format error for a tag '''

def __init__(self, message):
self.message = message

class MissingIdentifier(Exception):
''' custom error class,
describe a missing genotype identifier in the VCF'''

def __init__(self, message):
self.message = message

class VcfFormatError(Exception):
''' custom error class,
describe a format error in the VCF '''

def __init__(self, message):
self.message = message


#################################################################
#
# Vcf
Expand Down Expand Up @@ -87,7 +141,7 @@ def get_tag_field_idx(self, tag, field, tag_type='INFO', sep='|'):
format = format.replace('\"', '')
format = format.replace('>', '')
except Exception:
raise ValueError('\nERROR in VCF header structure, {0} tag definition has no format specification\n'
raise TagDefinitionError('\nERROR in VCF header structure, {0} tag definition has no format specification\n'
.format(tag))
#end try
# Search exact match
Expand All @@ -102,7 +156,7 @@ def get_tag_field_idx(self, tag, field, tag_type='INFO', sep='|'):
#end for
#end if
#end for
raise ValueError('\nERROR in VCF header structure, {0} tag definition is missing\n'
raise MissingTagDefinition('\nERROR in VCF header structure, {0} tag definition is missing\n'
.format(tag))
#end def

Expand All @@ -122,7 +176,7 @@ def check_tag_definition(self, tag, tag_type='INFO', sep='|'):
#end if
#end if
#end for
raise ValueError('\nERROR in VCF header structure, {0} tag definition is missing\n'
raise MissingTagDefinition('\nERROR in VCF header structure, {0} tag definition is missing\n'
.format(tag))
#end def

Expand Down Expand Up @@ -191,7 +245,7 @@ def remove_tag_genotype(self, tag_to_remove, sep=':'):
#end for
# Error if tag_to_remove not found in FORMAT
if idx_tag_to_remove == -1:
raise ValueError('\nERROR in variant FORMAT field, {0} tag is missing\n'
raise MissingTag('\nERROR in variant FORMAT field, {0} tag is missing\n'
.format(tag_to_remove))
#end if
# Updating FORMAT
Expand Down Expand Up @@ -250,7 +304,7 @@ def add_values_genotype(self, ID_genotype, values, sep=':'):
try:
self.GENOTYPES[ID_genotype] += sep + values
except Exception:
raise ValueError('\nERROR in GENOTYPES identifiers, {0} identifier is missing in VCF\n'
raise MissingIdentifier('\nERROR in GENOTYPES identifiers, {0} identifier is missing in VCF\n'
.format(ID_genotype))
#end try
#end def
Expand All @@ -272,18 +326,20 @@ def get_tag_value(self, tag_to_get, sep=';'):
try:
return tag.split(tag_to_get + '=')[1]
except Exception: # tag field is in a wrong format
raise ValueError('\nERROR in variant INFO field, {0} tag is in the wrong format\n'
raise TagFormatError('\nERROR in variant INFO field, {0} tag is in the wrong format\n'
.format(tag_to_get))
#end try
#end if
#end for

# tag_to_get not found
raise ValueError('\nERROR in variant INFO field, {0} tag is missing\n'.format(tag_to_get))
raise MissingTag('\nERROR in variant INFO field, {0} tag is missing\n'.format(tag_to_get))
#end def

def get_genotype_value(self, ID_genotype, tag_to_get, sep=':'):
''' get value from tag (tag_to_get) in genotype specified by corresponding ID '''
def get_genotype_value(self, ID_genotype, tag_to_get, complete_genotype=False, sep=':'):
''' get value from tag (tag_to_get) in genotype specified by corresponding ID
if complete_genotype, return '.' if tag is missing
if not complete_genotype, raise Exception for the missing tag '''
# Get index from FORMAT
idx_tag_to_get = -1
for i, tag in enumerate(self.FORMAT.split(sep)):
Expand All @@ -294,16 +350,24 @@ def get_genotype_value(self, ID_genotype, tag_to_get, sep=':'):
#end for
# Error if tag_to_get not found in FORMAT
if idx_tag_to_get == -1:
raise ValueError('\nERROR in variant FORMAT field, {0} tag is missing\n'
raise MissingTag('\nERROR in variant FORMAT field, {0} tag is missing\n'
.format(tag_to_get))
#end if
# Get value from index in genotype by ID
try:
return self.GENOTYPES[ID_genotype].split(sep)[idx_tag_to_get]
except Exception:
raise ValueError('\nERROR in GENOTYPES identifiers, {0} identifier is missing in VCF\n'
if self.GENOTYPES.get(ID_genotype):
try:
return self.GENOTYPES[ID_genotype].split(sep)[idx_tag_to_get]
except Exception:
if complete_genotype: # expect dropped tags in genotype, return default '.'
return '.'
else: # expect full genotype, raise error if tag is missing
raise MissingTag('\nERROR in variant GENOTYPE field, {0} tag is missing for {1} identifier\n'
.format(tag_to_get, ID_genotype))
#end try
else: # if genotype identifier is missing
raise MissingIdentifier('\nERROR in GENOTYPES identifiers, {0} identifier is missing in VCF\n'
.format(ID_genotype))
#end try
#end if
#end def

#end class Variant
Expand Down Expand Up @@ -349,7 +413,7 @@ def parse_header(self):
if definitions and columns:
return self.Header(definitions, columns, IDs_genotypes)
else:
raise ValueError('\nERROR in VCF header structure, missing essential lines\n')
raise VcfFormatError('\nERROR in VCF header structure, missing essential lines\n')
#end if
#end def

Expand All @@ -362,7 +426,7 @@ def parse_variants(self): # generator
try:
yield self.Variant(line_strip, self.header.IDs_genotypes)
except Exception:
raise ValueError('\nERROR in variant VCF structure, missing essential columns\n')
raise VcfFormatError('\nERROR in variant VCF structure, missing essential columns\n')
#end try
#end if
#end if
Expand Down
Loading