Skip to content

Commit

Permalink
Merge pull request #185 from INCATools/issue-137
Browse files Browse the repository at this point in the history
Handle missing columns and empty table cells by omitting outputs which require those values
  • Loading branch information
balhoff authored Jan 17, 2020
2 parents 6516eea + 07fcd61 commit db138a6
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 72 deletions.
93 changes: 48 additions & 45 deletions src/main/scala/org/monarchinitiative/dosdp/DOSDP.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,48 @@ import io.circe.syntax._
import org.apache.commons.codec.digest.DigestUtils

/**
* Basic data model for DOSDP schema, for serializing to/from JSON.
*/
* Basic data model for DOSDP schema, for serializing to/from JSON.
*/
final case class DOSDP(
pattern_name: Option[String],
pattern_iri: Option[String],
base_IRI: Option[String],
description: Option[String],
readable_identifiers: Option[List[String]],
classes: Option[Map[String, String]],
relations: Option[Map[String, String]],
objectProperties: Option[Map[String, String]],
dataProperties: Option[Map[String, String]],
annotationProperties: Option[Map[String, String]],
vars: Option[Map[String, String]],
list_vars: Option[Map[String, String]],
data_vars: Option[Map[String, String]],
data_list_vars: Option[Map[String, String]],
substitutions: Option[List[RegexSub]],
annotations: Option[List[Annotations]],
logical_axioms: Option[List[PrintfOWL]],
equivalentTo: Option[PrintfOWLConvenience],
subClassOf: Option[PrintfOWLConvenience],
disjointWith: Option[PrintfOWLConvenience],
GCI: Option[PrintfOWLConvenience],
name: Option[PrintfAnnotationOBO],
comment: Option[PrintfAnnotationOBO],
`def`: Option[PrintfAnnotationOBO],
namespace: Option[PrintfAnnotationOBO],
exact_synonym: Option[ListAnnotationOBO],
narrow_synonym: Option[ListAnnotationOBO],
related_synonym: Option[ListAnnotationOBO],
broad_synonym: Option[ListAnnotationOBO],
generated_synonyms: Option[List[PrintfAnnotationOBO]],
generated_narrow_synonyms: Option[List[PrintfAnnotationOBO]],
generated_broad_synonyms: Option[List[PrintfAnnotationOBO]],
generated_related_synonyms: Option[List[PrintfAnnotationOBO]],
xref: Option[ListAnnotationOBO],
instance_graph: Option[InstanceGraph])
pattern_name: Option[String] = None,
pattern_iri: Option[String] = None,
base_IRI: Option[String] = None,
description: Option[String] = None,
readable_identifiers: Option[List[String]] = None,
classes: Option[Map[String, String]] = None,
relations: Option[Map[String, String]] = None,
objectProperties: Option[Map[String, String]] = None,
dataProperties: Option[Map[String, String]] = None,
annotationProperties: Option[Map[String, String]] = None,
vars: Option[Map[String, String]] = None,
list_vars: Option[Map[String, String]] = None,
data_vars: Option[Map[String, String]] = None,
data_list_vars: Option[Map[String, String]] = None,
substitutions: Option[List[RegexSub]] = None,
annotations: Option[List[Annotations]] = None,
logical_axioms: Option[List[PrintfOWL]] = None,
equivalentTo: Option[PrintfOWLConvenience] = None,
subClassOf: Option[PrintfOWLConvenience] = None,
disjointWith: Option[PrintfOWLConvenience] = None,
GCI: Option[PrintfOWLConvenience] = None,
name: Option[PrintfAnnotationOBO] = None,
comment: Option[PrintfAnnotationOBO] = None,
`def`: Option[PrintfAnnotationOBO] = None,
namespace: Option[PrintfAnnotationOBO] = None,
exact_synonym: Option[ListAnnotationOBO] = None,
narrow_synonym: Option[ListAnnotationOBO] = None,
related_synonym: Option[ListAnnotationOBO] = None,
broad_synonym: Option[ListAnnotationOBO] = None,
generated_synonyms: Option[List[PrintfAnnotationOBO]] = None,
generated_narrow_synonyms: Option[List[PrintfAnnotationOBO]] = None,
generated_broad_synonyms: Option[List[PrintfAnnotationOBO]] = None,
generated_related_synonyms: Option[List[PrintfAnnotationOBO]] = None,
xref: Option[ListAnnotationOBO] = None,
instance_graph: Option[InstanceGraph] = None)

object DOSDP {

val empty = DOSDP(None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)
val empty: DOSDP = DOSDP()

val MultiValueDelimiter: Char = '|'

Expand Down Expand Up @@ -83,20 +83,23 @@ trait PrintfText {

def annotations: Option[List[Annotations]]

def replaced(bindings: Option[Map[String, SingleValue]]): String = PrintfText.replaced(this.text, this.vars, bindings)
def replaced(bindings: Option[Map[String, SingleValue]]): Option[String] = PrintfText.replaced(this.text, this.vars, bindings)

}

object PrintfText {

def replaced(text: String, vars: Option[List[String]], bindings: Option[Map[String, SingleValue]]): String = {
val fillers = vars.map { realVars =>
def replaced(text: String, vars: Option[List[String]], bindings: Option[Map[String, SingleValue]]): Option[String] = {
import cats.implicits._
val fillersOpt = vars.map { realVars =>
bindings match {
case None => realVars.map(name => "'$" + name + "'")
case Some(bound) => realVars.map(bound.mapValues(_.value))
case None => Some(realVars.map(name => "'$" + name + "'"))
case Some(bound) =>
val stringValues = bound.mapValues(_.value)
realVars.map(v => stringValues.get(v)).sequence
}
}.getOrElse(Nil)
text.format(fillers: _*)
}
fillersOpt.getOrElse(Some(Nil)).map(fillers => text.format(fillers: _*))
}

}
Expand Down
52 changes: 32 additions & 20 deletions src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import scala.collection.JavaConverters._
import scala.util.matching.Regex.Match

/**
* Wraps a DOSDP data structure with functionality dependent on expanding IDs into IRIs
*/
* Wraps a DOSDP data structure with functionality dependent on expanding IDs into IRIs
*/
final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, String]) extends LazyLogging {

lazy val checker = new DOSDPEntityChecker(dosdp, prefixes)
Expand All @@ -25,24 +25,36 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S

def allObjectProperties: Map[String, String] = dosdp.relations.getOrElse(Map.empty) ++ dosdp.objectProperties.getOrElse(Map.empty)

def equivalentToExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.equivalentTo.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings))
def equivalentToExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for {
eq <- dosdp.equivalentTo
ce <- expressionFor(eq, logicalBindings)
} yield ce -> annotationsFor(eq, annotationBindings, logicalBindings)

def subClassOfExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.subClassOf.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings))
def subClassOfExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for {
sco <- dosdp.subClassOf
ce <- expressionFor(sco, logicalBindings)
} yield ce -> annotationsFor(sco, annotationBindings, logicalBindings)

def disjointWithExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.disjointWith.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings))
def disjointWithExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for {
dw <- dosdp.disjointWith
ce <- expressionFor(dw, logicalBindings)
} yield ce -> annotationsFor(dw, annotationBindings, logicalBindings)

def gciAxiom(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLAxiom, Set[OWLAnnotation])] = dosdp.GCI.map(gci => axiomFor(gci, logicalBindings) -> annotationsFor(gci, annotationBindings, logicalBindings))
def gciAxiom(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLAxiom, Set[OWLAnnotation])] = for {
gci <- dosdp.GCI
ax <- axiomFor(gci, logicalBindings)
} yield ax -> annotationsFor(gci, annotationBindings, logicalBindings)

def logicalAxioms(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Set[OWLAxiom] = (for {
axiomDefs <- dosdp.logical_axioms.toList
axiomDef <- axiomDefs
defTerm = definedTerm(logicalBindings)
} yield axiomDef.axiom_type match {
case AxiomType.EquivalentTo => EquivalentClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, expressionFor(axiomDef, logicalBindings))
case AxiomType.SubClassOf => SubClassOf(annotationsFor(axiomDef, annotationBindings, logicalBindings), defTerm, expressionFor(axiomDef, logicalBindings))
case AxiomType.DisjointWith => DisjointClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, expressionFor(axiomDef, logicalBindings))
case AxiomType.GCI => axiomFor(axiomDef, logicalBindings).getAnnotatedAxiom(annotationsFor(axiomDef, annotationBindings, logicalBindings).asJava)
}).toSet
case AxiomType.EquivalentTo => expressionFor(axiomDef, logicalBindings).map(ce => EquivalentClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, ce))
case AxiomType.SubClassOf => expressionFor(axiomDef, logicalBindings).map(ce => SubClassOf(annotationsFor(axiomDef, annotationBindings, logicalBindings), defTerm, ce))
case AxiomType.DisjointWith => expressionFor(axiomDef, logicalBindings).map(ce => DisjointClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, ce))
case AxiomType.GCI => axiomFor(axiomDef, logicalBindings).map(ax => ax.getAnnotatedAxiom(annotationsFor(axiomDef, annotationBindings, logicalBindings).asJava))
}).toSet.flatten

private val term = Class(DOSDP.variableToIRI(DOSDP.DefinedClassVariable))

Expand All @@ -65,11 +77,11 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S
vars.mapValues(expressionParser.parse)
}

private def expressionFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): OWLClassExpression =
expressionParser.parse(template.replaced(bindings))
private def expressionFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): Option[OWLClassExpression] =
template.replaced(bindings).map(expressionParser.parse)

private def axiomFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): OWLAxiom =
axiomParser.parse(template.replaced(bindings))
private def axiomFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): Option[OWLAxiom] =
template.replaced(bindings).map(axiomParser.parse)

private def annotationsFor(element: PrintfText, annotationBindings: Option[Map[String, Binding]], logicalBindings: Option[Map[String, Binding]]): Set[OWLAnnotation] =
(for {
Expand Down Expand Up @@ -113,20 +125,20 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S

private def translateAnnotations(annotationField: NormalizedAnnotation, annotationBindings: Option[Bindings], logicalBindings: Option[Bindings]): Set[OWLAnnotation] = annotationField match {
case NormalizedPrintfAnnotation(prop, text, vars, overrideColumnOpt, subAnnotations) =>
val value = (for {
val valueOpt = (for {
column <- overrideColumnOpt
bindings <- annotationBindings
SingleValue(binding) <- bindings.get(column)
trimmed = binding.trim
if trimmed.nonEmpty
} yield trimmed).getOrElse(PrintfText.replaced(text, vars, annotationBindings.map(singleValueBindings)))
Set(Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, value))
} yield trimmed).orElse(PrintfText.replaced(text, vars, annotationBindings.map(singleValueBindings)))
valueOpt.toSet[String].map(value => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, value))
case NormalizedListAnnotation(prop, value, subAnnotations) =>
// If no variable bindings are passed in, dummy value is filled in using variable name
val multiValBindingsOpt = annotationBindings.map(multiValueBindings)
val bindingsMap = multiValBindingsOpt.getOrElse(Map(value -> MultiValue(Set("'$" + value + "'"))))
val listValue = bindingsMap(value)
listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, v))
val listValueOpt = bindingsMap.get(value)
listValueOpt.toSet[MultiValue].flatMap(listValue => listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, v)))
case NormalizedIRIValueAnnotation(prop, varr, subAnnotations) =>
val iriValue = (for {
actualBindings <- logicalBindings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ trait Common extends Command with LazyLogging {
def inputDOSDPFrom(location: String): DOSDP = {
val possibleFile = new File(location)
val source = if (possibleFile.exists) Source.fromFile(possibleFile, "UTF-8")
else Source.fromURL(templateFile, "UTF-8")
else Source.fromURL(location, "UTF-8")
parser.parse(source.mkString).right.flatMap(json => json.as[DOSDP]) match {
case Right(dosdp) => dosdp
case Left(error) =>
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ object Generate extends Command(description = "generate ontology axioms for TSV
val (varBindingsItems, localLabelItems) = (for {
vars <- dosdp.vars.toSeq
varr <- vars.keys
filler <- row.get(varr)
filler <- row.get(varr).flatMap(stripToOption)
fillerLabelOpt = for {
fillerIRI <- Prefixes.idToIRI(filler, prefixes)
label <- row.get(s"${varr}_label").flatMap(stripToOption)
Expand All @@ -73,17 +73,17 @@ object Generate extends Command(description = "generate ontology axioms for TSV
val listVarBindings = (for {
listVars <- dosdp.list_vars.toSeq
listVar <- listVars.keys
filler <- row.get(listVar)
filler <- row.get(listVar).flatMap(stripToOption)
} yield listVar -> MultiValue(filler.split(DOSDP.MultiValueDelimiter).map(_.trim).toSet)).toMap
val dataVarBindings = (for {
dataVars <- dosdp.data_vars.toSeq
dataVar <- dataVars.keys
filler <- row.get(dataVar)
filler <- row.get(dataVar).flatMap(stripToOption)
} yield dataVar -> SingleValue(filler.trim)).toMap
val dataListBindings = (for {
dataListVars <- dosdp.data_list_vars.toSeq
dataListVar <- dataListVars.keys
filler <- row.get(dataListVar)
filler <- row.get(dataListVar).flatMap(stripToOption)
} yield dataListVar -> MultiValue(filler.split(DOSDP.MultiValueDelimiter).map(_.trim).toSet)).toMap
val additionalBindings = for {
(key, value) <- row.filterKeys(k => !knownColumns(k))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defined_class structure structure_label taxon seeAlso exactSynonym relatedSynonyms term_name substitute_me
EX:0001 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle a synonym for this term related term 1|related term 2 Term 0001 one two
EX:0002 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 Term 0002 one two
EX:0003 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 one two
EX:0004 VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 Term 0004 one two
EX:0005 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle a synonym for this term related term 1|related term 2 Term 0005
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
pattern_name: missing_values_test

classes:
shape: PATO:shape
anatomical_entity: UBERON:anatomical_entity
thing: owl:Thing

relations:
part_of: RO:part_of

annotationProperties:
never_in_taxon: RO:never_in_taxon

vars:
structure: anatomical_entity
taxon: thing

list_vars:
seeAlso: anatomical_entity

data_vars:
term_name: xsd:string
substitute_me: xsd:string

data_list_vars:
exactSynonym: xsd:string
relatedSynonyms: xsd:string
broadSynonyms: xsd:string # don't put this column in the test input

annotations:
- annotationProperty: never_in_taxon
var: taxon

name:
text: "%s"
vars:
- term_name

def:
text: "%s %s %s"
vars:
- term_name
- structure
- substitute_me_munged

exact_synonym:
value: exactSynonym

broad_synonym:
value: broadSynonyms

substitutions:
- in: substitute_me
out: substitute_me_munged
match: (.+) (.+)
sub: \2 and then \1

equivalentTo:
text: "%s and (part_of some %s)"
vars:
- structure
- taxon
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class BlankLineTest extends UnitSpec {

val dosdp = Generate.inputDOSDPFrom("src/test/resources/org/monarchinitiative/dosdp/test_blank_lines.yaml")
val fillers = Generate.readFillers(new File("src/test/resources/org/monarchinitiative/dosdp/test_blank_lines.tsv"), new TSVFormat {})
val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers: Iterator[Map[String, String]], None, true, true, None, false)
val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers, None, true, true, None, false)

"Blank lines" should "not cause errors" in {
axioms should contain(Class("http://ex.org/1") Annotation(RDFSLabel, "http://example.org/Entity1 thing"))
Expand Down
17 changes: 17 additions & 0 deletions src/test/scala/org/monarchinitiative/dosdp/MissingValuesTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.monarchinitiative.dosdp

import java.io.File

import com.github.tototoshi.csv.TSVFormat
import org.monarchinitiative.dosdp.cli.Generate

class MissingValuesTest extends UnitSpec {

"Missing columns and cell values" should "be handled by dropping outputs" in {
val dosdp = Generate.inputDOSDPFrom("src/test/resources/org/monarchinitiative/dosdp/missing_values_test.yaml")
val fillers = Generate.readFillers(new File("src/test/resources/org/monarchinitiative/dosdp/missing_values_test.tsv"), new TSVFormat {})
val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers, None, true, true, None, false)
// No exceptions should be thrown
}

}
Loading

0 comments on commit db138a6

Please sign in to comment.