Skip to content

Commit

Permalink
Slurp pipeline
Browse files Browse the repository at this point in the history
-
  • Loading branch information
joeflack4 committed Aug 3, 2022
1 parent 769decf commit 3356948
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 51 deletions.
27 changes: 19 additions & 8 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -281,16 +281,27 @@ lexical_matches: mappings/mondo-sources-all-lexical.sssom.tsv
slurp/:
mkdir -p $@

# Feel free to change the signature. Min ID is the next available Mondo ID.
slurp/%.tsv: components/%.owl tmp/mondo.sssom.tsv reports/mirror-signature-mondo.tsv | slurp/
# TODO: Move this to Makefile. how, when I shouldn't edit it from this repo?
.PHONY: component-download-mondo.owl
component-download-mondo.owl: | $(TMPDIR)
if [ $(MIR) = true ] && [ $(COMP) = true ]; then $(ROBOT) merge -I http://purl.obolibrary.org/obo/mondo.owl \
annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) -o $(TMPDIR)/$@.owl; fi

# TODO: Move this to Makefile. how, when I shouldn't edit it from this repo?
$(COMPONENTSDIR)/mondo.owl: component-download-mondo.owl
if [ $(COMP) = true ] ; then if cmp -s $(TMPDIR)/component-download-mondo.owl.owl $@ ; then echo "Component identical."; else echo "Component is different, updating." && cp $(TMPDIR)/component-download-mondo.owl.owl $@; fi; fi

# min-id: the next available Mondo ID
slurp/%.tsv: $(COMPONENTSDIR)/%.owl $(TMPDIR)/mondo.sssom.tsv $(REPORTDIR)/mirror_signature-mondo.tsv | slurp/
python $(SCRIPTSDIR)/migrate.py \
-i $< \
--mapping-file tmp/mondo.sssom.tsv \
--ontology-path $(COMPONENTSDIR)/%.owl \
--sssom-map-path $(TMPDIR)/mondo.sssom.tsv \
--min-id 123000 \
--mondo-terms reports/mirror-signature-mondo.tsv \
--output $@
--mondo-terms-path $(REPORTDIR)/mirror_signature-mondo.tsv \
--outpath $@

slurp-%: slurp/%.tsv

# TODO: add more ontologies, e.g.: doid, icd10cm, icd10who, ncit, ordo
slurp: slurp-omim
# TODO: change to all ontologies when ready
# slurp-all: slurp-omim slurp-doid slurp-ncit slurp-ordo slurp-icd10cm slurp-icd10who
slurp-all: slurp-omim
104 changes: 61 additions & 43 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,72 @@
"""Migration pipeline
"""Slurp migration pipeline
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
TODOs:
TODO's:
- add CLI: look to makefile for what to include
"""
import oakliblib
import pandas

import os
from argparse import ArgumentParser
from typing import Dict

#Inputs:
source_ontology = '' #e.g. omim
sssom_map = '' # e.g. mondo.sssom.tsv
min_id = ''
termlist_mondo = ''
import oaklib
import pandas as pd


def run(source_ontology = '', sssom_map = '', min_id = '', termlist_mondo = ''):
"""source_ontology = '' #e.g. omim
sssom_map = '' # e.g. mondo.sssom.tsv
min_id = ''
termlist_mondo = ''"""
#Outputs:
def run(ontology_path: str, sssom_map_path: str, min_id: int, mondo_terms_path: str, outpath: str):
# TODO: read files
source_ontology = ontology_path
print(os.path.exists(os.path.join(os.getcwd(), sssom_map_path)))
print(os.path.exists(os.path.join(os.getcwd(), mondo_terms_path)))
print(os.path.exists(os.path.join(os.getcwd(), ontology_path)))
sssom_map = pd.read_csv(sssom_map_path, comment='#', sep='\t')
termlist_mondo = pd.read_csv(mondo_terms_path, comment='#', sep='\t')
data = []
# for t in source_ontology:
# if t not in sssom_map['object_id']:
# parents = []
# migrate = True
# for p in oaklib.get_direct_parents(t):
# if p not in sssom_map['object_id']:
# migrate = False
# break
# elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \
# or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch':
# # In other words, if the parent is mapped, and the mapping is either exact or narrower
# parents.append(sssom_map[sssom_map['object_id']==p]['subject_id'])
# else:
# # Its fine, just continue looking for other parents in this case
# if migrate and parents:
# next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist.
# label = oaklib.get_label(t)
# definition = oaklib.get_definition(t)
# data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition})
#
# pd.DataFrame(data).to_csv(fn, sep="\t")
pass


for t in source_ontology:
if t not in sssom_map['object_id']:
parents = []
migrate = True
for p in oaklib.get_direct_parents(t):
if p not in sssom_map['object_id']:
migrate = False
break
elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \
or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch':
# In other words, if the parent is mapped, and the mapping is either exact or narrower
parents.append(sssom_map[sssom_map['object_id']==p]['subject_id'])
else:
# Its fine, just continue looking for other parents in this case
if migrate and parents:
next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist.
label = oaklib.get_label(t)
definition = oaklib.get_definition(t)
data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition})

pandas.DataFrame(data).to_csv(fn, sep="\t")
def cli() :
"""Command line interface."""
package_description = \
'Slurp pipeline: Integrate new terms from other ontologies into Mondo.'
parser = ArgumentParser(description=package_description)
parser.add_argument(
'-o', '--ontology-path', required=True,
help='xxxxxx')
parser.add_argument(
'-m', '--sssom-map-path', required=True,
help='xxxxxx')
parser.add_argument(
'-i', '--min-id', required=True,
help='xxxxxx')
parser.add_argument(
'-t', '--mondo-terms-path', required=True,
help='xxxxxx')
parser.add_argument(
'-O', '--outpath', required=True,
help='xxxxxx')
d: Dict = vars(parser.parse_args())
return run(**d)


if __name__ == '__main__':
run()
cli()

0 comments on commit 3356948

Please sign in to comment.