Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement TSV output mode. #162

Merged
merged 3 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ lazy val cli = project
publish / skip := true,
libraryDependencies ++= Seq(
"com.outr" %% "scribe-slf4j" % "3.10.4",
"com.github.alexarchambault" %% "case-app" % "2.0.6"
"com.github.alexarchambault" %% "case-app" % "2.0.6",
"io.circe" %% "circe-yaml" % "0.14.1",
),
gitCommitString := git.gitHeadCommit.value.getOrElse("Not Set"),
buildInfoKeys := Seq[BuildInfoKey](name, version, scalaVersion, sbtVersion, gitCommitString),
Expand Down
28 changes: 23 additions & 5 deletions cli/src/main/scala/org/renci/relationgraph/Config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ package org.renci.relationgraph
import caseapp._
import caseapp.core.Error.MalformedValue
import caseapp.core.argparser.{ArgParser, SimpleArgParser}
import org.renci.relationgraph.Config.{BoolValue, FalseValue, TrueValue}
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, OutputMode, RDFMode}
import org.renci.relationgraph.Config.{BoolValue, FalseValue, OutputMode, RDFMode, TrueValue}

@AppName("relation-graph")
@ProgName("relation-graph")
Expand All @@ -15,8 +14,8 @@ final case class Config(
@HelpMessage("File to stream output triples to.")
@ValueDescription("filename")
outputFile: String,
@HelpMessage("Configure style of triples to be output. RDF mode is the default; each existential relation is collapsed to a single direct triple.")
@ValueDescription("RDF|OWL")
@HelpMessage("Configure style of triples to be output. RDF mode is the default; each existential relation is collapsed to a single direct triple. TSV mode outputs the same triples as RDF mode, but as TSV, compacting IRIs using an optional prefixes file.")
@ValueDescription("RDF|OWL|TSV")
mode: OutputMode = RDFMode,
@HelpMessage("Property to restrict output relations to. Provide option multiple times for multiple properties. If no properties are provided (via CLI or file), then all properties found in the ontology will be used.")
@ValueDescription("IRI")
Expand All @@ -42,13 +41,23 @@ final case class Config(
@HelpMessage("Disable inference of unsatisfiable classes by the whelk reasoner (default false)")
@ValueDescription("bool")
disableOwlNothing: BoolValue = FalseValue,
@HelpMessage("Prefix mappings to use for TSV output (YAML dictionary")
@ValueDescription("filename")
prefixes: Option[String],
@HelpMessage("Compact OBO-style IRIs regardless of inclusion in prefixes file")
@ValueDescription("bool")
oboPrefixes: BoolValue = TrueValue,
@HelpMessage("Set log level to INFO")
@ValueDescription("bool")
verbose: Boolean = false) {

def toRelationGraphConfig: RelationGraph.Config =
RelationGraph.Config(
mode = this.mode,
mode = this.mode match {
case Config.RDFMode => RelationGraph.Config.RDFMode
case Config.OWLMode => RelationGraph.Config.OWLMode
case Config.TSVMode => RelationGraph.Config.RDFMode
},
outputSubclasses = this.outputSubclasses.bool,
reflexiveSubclasses = this.reflexiveSubclasses.bool,
equivalenceAsSubclass = this.equivalenceAsSubclass.bool,
Expand All @@ -61,10 +70,19 @@ final case class Config(

object Config {

sealed trait OutputMode

case object RDFMode extends OutputMode

case object OWLMode extends OutputMode

case object TSVMode extends OutputMode

implicit val rdfModeParser: ArgParser[OutputMode] = SimpleArgParser.from[OutputMode]("output mode") { arg =>
arg.toLowerCase match {
case "rdf" => Right(RDFMode)
case "owl" => Right(OWLMode)
case "tsv" => Right(TSVMode)
case _ => Left(MalformedValue("output mode", arg))
}
}
Expand Down
35 changes: 32 additions & 3 deletions cli/src/main/scala/org/renci/relationgraph/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ import org.semanticweb.owlapi.model._
import scribe.Level
import scribe.filter.{packageName, select}
import zio._
import Config._
import io.circe.yaml.parser
import org.renci.relationgraph.Config.{OWLMode, RDFMode, TSVMode}

import java.io.{File, FileOutputStream}
import java.io.{File, FileOutputStream, FileReader}
import scala.io.Source

object Main extends ZCaseApp[Config] {
Expand All @@ -27,7 +28,7 @@ object Main extends ZCaseApp[Config] {
.replace()
}
val program = ZIO.scoped {
createStreamRDF(config.outputFile).flatMap { rdfWriter =>
createStream(config).flatMap { rdfWriter =>
for {
fileProperties <- config.propertiesFile.map(readPropertiesFile).getOrElse(ZIO.succeed(Set.empty[AtomicConcept]))
specifiedProperties = fileProperties ++ config.property.map(prop => AtomicConcept(prop)).to(Set)
Expand All @@ -47,6 +48,15 @@ object Main extends ZCaseApp[Config] {
}.exitCode
}

def createStream(config: Config): ZIO[Scope, Throwable, StreamRDF] = config.mode match {
case RDFMode => createStreamRDF(config.outputFile)
case OWLMode => createStreamRDF(config.outputFile)
case TSVMode =>
ZIO.foreach(config.prefixes)(readPrefixesFile).flatMap { maybePrefixes =>
createStreamTSV(config.outputFile, maybePrefixes.getOrElse(Map.empty), config.oboPrefixes.bool)
}
}

def createStreamRDF(path: String): ZIO[Scope, Throwable, StreamRDF] = {
ZIO.acquireRelease(ZIO.attempt(new FileOutputStream(new File(path))))(stream => ZIO.succeed(stream.close())).flatMap { outputStream =>
ZIO.acquireRelease(ZIO.attempt {
Expand All @@ -57,6 +67,16 @@ object Main extends ZCaseApp[Config] {
}
}

def createStreamTSV(path: String, prefixes: Map[String, String], oboPrefixes: Boolean): ZIO[Scope, Throwable, StreamRDF] = {
ZIO.attempt(new File(path)).flatMap { file =>
ZIO.acquireRelease(ZIO.attempt {
val stream = new TSVStreamRDF(file, prefixes, oboPrefixes)
stream.start()
stream
})(stream => ZIO.succeed(stream.finish()))
}
}

def loadOntology(path: String): Task[OWLOntology] = for {
manager <- ZIO.attempt(OWLManager.createOWLOntologyManager())
ontology <- ZIO.attemptBlocking(manager.loadOntologyFromOntologyDocument(new File(path)))
Expand All @@ -67,4 +87,13 @@ object Main extends ZCaseApp[Config] {
ZIO.attemptBlocking(source.getLines().map(_.trim).filter(_.nonEmpty).map(line => AtomicConcept(line)).to(Set))
}

def readPrefixesFile(filename: String): ZIO[Any, Throwable, Map[String, String]] =
ZIO.attemptBlocking(new FileReader(new File(filename))).acquireReleaseWithAuto { reader =>
ZIO.fromEither {
parser.parse(reader).flatMap { json =>
json.as[Map[String, String]]
}
}
}

}
56 changes: 56 additions & 0 deletions cli/src/main/scala/org/renci/relationgraph/TSVStreamRDF.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package org.renci.relationgraph

import org.apache.jena.graph.Triple
import org.apache.jena.riot.system.StreamRDF
import org.apache.jena.shared.PrefixMapping
import org.apache.jena.shared.impl.PrefixMappingImpl
import org.apache.jena.sparql.core.Quad

import java.io.{File, PrintWriter}
import scala.jdk.CollectionConverters._

class TSVStreamRDF(file: File, prefixes: Map[String, String], oboPrefixes: Boolean) extends StreamRDF {

private val prefixMapping: PrefixMapping = {
val pm = new PrefixMappingImpl() {
override def shortForm(uri: String): String = {
val shortForm = super.shortForm(uri)
if (oboPrefixes && (shortForm == uri) && (uri.startsWith("http://purl.obolibrary.org/obo/"))) {
val tail = uri.replace("http://purl.obolibrary.org/obo/", "")
tail.split("_", 2).mkString(":")
} else shortForm
}
}
pm.setNsPrefixes(prefixes.asJava).withDefaultMappings(PrefixMapping.Standard)
}

private var writer: PrintWriter = _

override def start(): Unit = {
writer = new PrintWriter(file, "utf-8")
}

override def triple(triple: Triple): Unit = {
val s = triple.getSubject.toString(prefixMapping, true)
val p = triple.getPredicate.toString(prefixMapping, true)
val o = triple.getObject.toString(prefixMapping, true)
writer.println(s"$s\t$p\t$o")
}

override def quad(quad: Quad): Unit = {
val s = quad.getSubject.toString(prefixMapping, true)
val p = quad.getPredicate.toString(prefixMapping, true)
val o = quad.getObject.toString(prefixMapping, true)
val g = quad.getGraph.toString(prefixMapping, true)
writer.println(s"$s\t$p\t$o\t$g")
}

override def base(base: String): Unit = ()

override def prefix(prefix: String, iri: String): Unit = ()

override def finish(): Unit = {
writer.close()
}

}
14 changes: 7 additions & 7 deletions core/src/main/scala/org/renci/relationgraph/RelationGraph.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import org.apache.jena.sys.JenaSystem
import org.apache.jena.vocabulary.{OWL2, RDF, RDFS}
import org.geneontology.whelk.BuiltIn.{Bottom, Top}
import org.geneontology.whelk._
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, OutputMode, RDFMode}
import org.renci.relationgraph.RelationGraph.Config.{OWLMode, RDFMode, TriplesMode}
import org.semanticweb.owlapi.apibinding.OWLFunctionalSyntaxFactory.{OWLNothing, OWLThing}
import org.semanticweb.owlapi.model.parameters.Imports
import org.semanticweb.owlapi.model._
Expand All @@ -32,7 +32,7 @@ object RelationGraph extends StrictLogging {
private val OWLOntology = OWL2.Ontology.asNode

final case class Config(
mode: OutputMode = RDFMode,
mode: TriplesMode = RDFMode,
outputSubclasses: Boolean = false,
reflexiveSubclasses: Boolean = true,
equivalenceAsSubclass: Boolean = true,
Expand All @@ -43,11 +43,11 @@ object RelationGraph extends StrictLogging {

object Config {

sealed trait OutputMode
sealed trait TriplesMode

case object RDFMode extends OutputMode
case object RDFMode extends TriplesMode

case object OWLMode extends OutputMode
case object OWLMode extends TriplesMode

}

Expand Down Expand Up @@ -101,7 +101,7 @@ object RelationGraph extends StrictLogging {
} yield ()
}

def processRestrictionAndExtendQueue(restriction: Restriction, properties: Hierarchy, classes: Hierarchy, whelk: IndexedReasonerState, mode: Config.OutputMode, descendProperties: Boolean, outputClasses: Boolean, outputIndividuals: Boolean, queue: Queue[Restriction], activeRestrictions: Ref[Int], seenRefs: Map[Role, Ref[Set[AtomicConcept]]]): UIO[TriplesGroup] = {
def processRestrictionAndExtendQueue(restriction: Restriction, properties: Hierarchy, classes: Hierarchy, whelk: IndexedReasonerState, mode: Config.TriplesMode, descendProperties: Boolean, outputClasses: Boolean, outputIndividuals: Boolean, queue: Queue[Restriction], activeRestrictions: Ref[Int], seenRefs: Map[Role, Ref[Set[AtomicConcept]]]): UIO[TriplesGroup] = {
val triples = processRestriction(restriction, whelk, mode, outputClasses, outputIndividuals)
val continue = triples.redundant.nonEmpty
for {
Expand Down Expand Up @@ -134,7 +134,7 @@ object RelationGraph extends StrictLogging {
} yield triples
}

def processRestriction(restriction: Restriction, whelk: IndexedReasonerState, mode: Config.OutputMode, outputClasses: Boolean, outputIndividuals: Boolean): TriplesGroup = {
def processRestriction(restriction: Restriction, whelk: IndexedReasonerState, mode: Config.TriplesMode, outputClasses: Boolean, outputIndividuals: Boolean): TriplesGroup = {
val subConcepts = queryExistentialSubclasses(restriction, whelk)
val subclasses = if (outputClasses) (subConcepts - Bottom).collect { case AtomicConcept(id) => id } else Set.empty[String]
val instances = if (outputIndividuals) subConcepts.collect { case Nominal(Individual(id)) => id } else Set.empty[String]
Expand Down