diff --git a/src/main/scala/org/monarchinitiative/dosdp/DOSDP.scala b/src/main/scala/org/monarchinitiative/dosdp/DOSDP.scala index 9d80cb5..887ac7a 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/DOSDP.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/DOSDP.scala @@ -9,48 +9,48 @@ import io.circe.syntax._ import org.apache.commons.codec.digest.DigestUtils /** - * Basic data model for DOSDP schema, for serializing to/from JSON. - */ + * Basic data model for DOSDP schema, for serializing to/from JSON. + */ final case class DOSDP( - pattern_name: Option[String], - pattern_iri: Option[String], - base_IRI: Option[String], - description: Option[String], - readable_identifiers: Option[List[String]], - classes: Option[Map[String, String]], - relations: Option[Map[String, String]], - objectProperties: Option[Map[String, String]], - dataProperties: Option[Map[String, String]], - annotationProperties: Option[Map[String, String]], - vars: Option[Map[String, String]], - list_vars: Option[Map[String, String]], - data_vars: Option[Map[String, String]], - data_list_vars: Option[Map[String, String]], - substitutions: Option[List[RegexSub]], - annotations: Option[List[Annotations]], - logical_axioms: Option[List[PrintfOWL]], - equivalentTo: Option[PrintfOWLConvenience], - subClassOf: Option[PrintfOWLConvenience], - disjointWith: Option[PrintfOWLConvenience], - GCI: Option[PrintfOWLConvenience], - name: Option[PrintfAnnotationOBO], - comment: Option[PrintfAnnotationOBO], - `def`: Option[PrintfAnnotationOBO], - namespace: Option[PrintfAnnotationOBO], - exact_synonym: Option[ListAnnotationOBO], - narrow_synonym: Option[ListAnnotationOBO], - related_synonym: Option[ListAnnotationOBO], - broad_synonym: Option[ListAnnotationOBO], - generated_synonyms: Option[List[PrintfAnnotationOBO]], - generated_narrow_synonyms: Option[List[PrintfAnnotationOBO]], - generated_broad_synonyms: Option[List[PrintfAnnotationOBO]], - generated_related_synonyms: Option[List[PrintfAnnotationOBO]], - xref: Option[ListAnnotationOBO], - instance_graph: Option[InstanceGraph]) + pattern_name: Option[String] = None, + pattern_iri: Option[String] = None, + base_IRI: Option[String] = None, + description: Option[String] = None, + readable_identifiers: Option[List[String]] = None, + classes: Option[Map[String, String]] = None, + relations: Option[Map[String, String]] = None, + objectProperties: Option[Map[String, String]] = None, + dataProperties: Option[Map[String, String]] = None, + annotationProperties: Option[Map[String, String]] = None, + vars: Option[Map[String, String]] = None, + list_vars: Option[Map[String, String]] = None, + data_vars: Option[Map[String, String]] = None, + data_list_vars: Option[Map[String, String]] = None, + substitutions: Option[List[RegexSub]] = None, + annotations: Option[List[Annotations]] = None, + logical_axioms: Option[List[PrintfOWL]] = None, + equivalentTo: Option[PrintfOWLConvenience] = None, + subClassOf: Option[PrintfOWLConvenience] = None, + disjointWith: Option[PrintfOWLConvenience] = None, + GCI: Option[PrintfOWLConvenience] = None, + name: Option[PrintfAnnotationOBO] = None, + comment: Option[PrintfAnnotationOBO] = None, + `def`: Option[PrintfAnnotationOBO] = None, + namespace: Option[PrintfAnnotationOBO] = None, + exact_synonym: Option[ListAnnotationOBO] = None, + narrow_synonym: Option[ListAnnotationOBO] = None, + related_synonym: Option[ListAnnotationOBO] = None, + broad_synonym: Option[ListAnnotationOBO] = None, + generated_synonyms: Option[List[PrintfAnnotationOBO]] = None, + generated_narrow_synonyms: Option[List[PrintfAnnotationOBO]] = None, + generated_broad_synonyms: Option[List[PrintfAnnotationOBO]] = None, + generated_related_synonyms: Option[List[PrintfAnnotationOBO]] = None, + xref: Option[ListAnnotationOBO] = None, + instance_graph: Option[InstanceGraph] = None) object DOSDP { - val empty = DOSDP(None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None) + val empty: DOSDP = DOSDP() val MultiValueDelimiter: Char = '|' @@ -83,20 +83,23 @@ trait PrintfText { def annotations: Option[List[Annotations]] - def replaced(bindings: Option[Map[String, SingleValue]]): String = PrintfText.replaced(this.text, this.vars, bindings) + def replaced(bindings: Option[Map[String, SingleValue]]): Option[String] = PrintfText.replaced(this.text, this.vars, bindings) } object PrintfText { - def replaced(text: String, vars: Option[List[String]], bindings: Option[Map[String, SingleValue]]): String = { - val fillers = vars.map { realVars => + def replaced(text: String, vars: Option[List[String]], bindings: Option[Map[String, SingleValue]]): Option[String] = { + import cats.implicits._ + val fillersOpt = vars.map { realVars => bindings match { - case None => realVars.map(name => "'$" + name + "'") - case Some(bound) => realVars.map(bound.mapValues(_.value)) + case None => Some(realVars.map(name => "'$" + name + "'")) + case Some(bound) => + val stringValues = bound.mapValues(_.value) + realVars.map(v => stringValues.get(v)).sequence } - }.getOrElse(Nil) - text.format(fillers: _*) + } + fillersOpt.getOrElse(Some(Nil)).map(fillers => text.format(fillers: _*)) } } diff --git a/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala b/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala index e2fcfae..99d0d4f 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala @@ -10,8 +10,8 @@ import scala.collection.JavaConverters._ import scala.util.matching.Regex.Match /** - * Wraps a DOSDP data structure with functionality dependent on expanding IDs into IRIs - */ + * Wraps a DOSDP data structure with functionality dependent on expanding IDs into IRIs + */ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, String]) extends LazyLogging { lazy val checker = new DOSDPEntityChecker(dosdp, prefixes) @@ -25,24 +25,36 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S def allObjectProperties: Map[String, String] = dosdp.relations.getOrElse(Map.empty) ++ dosdp.objectProperties.getOrElse(Map.empty) - def equivalentToExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.equivalentTo.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings)) + def equivalentToExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for { + eq <- dosdp.equivalentTo + ce <- expressionFor(eq, logicalBindings) + } yield ce -> annotationsFor(eq, annotationBindings, logicalBindings) - def subClassOfExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.subClassOf.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings)) + def subClassOfExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for { + sco <- dosdp.subClassOf + ce <- expressionFor(sco, logicalBindings) + } yield ce -> annotationsFor(sco, annotationBindings, logicalBindings) - def disjointWithExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = dosdp.disjointWith.map(eq => expressionFor(eq, logicalBindings) -> annotationsFor(eq, annotationBindings, logicalBindings)) + def disjointWithExpression(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLClassExpression, Set[OWLAnnotation])] = for { + dw <- dosdp.disjointWith + ce <- expressionFor(dw, logicalBindings) + } yield ce -> annotationsFor(dw, annotationBindings, logicalBindings) - def gciAxiom(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLAxiom, Set[OWLAnnotation])] = dosdp.GCI.map(gci => axiomFor(gci, logicalBindings) -> annotationsFor(gci, annotationBindings, logicalBindings)) + def gciAxiom(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Option[(OWLAxiom, Set[OWLAnnotation])] = for { + gci <- dosdp.GCI + ax <- axiomFor(gci, logicalBindings) + } yield ax -> annotationsFor(gci, annotationBindings, logicalBindings) def logicalAxioms(logicalBindings: Option[Map[String, SingleValue]], annotationBindings: Option[Map[String, Binding]]): Set[OWLAxiom] = (for { axiomDefs <- dosdp.logical_axioms.toList axiomDef <- axiomDefs defTerm = definedTerm(logicalBindings) } yield axiomDef.axiom_type match { - case AxiomType.EquivalentTo => EquivalentClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, expressionFor(axiomDef, logicalBindings)) - case AxiomType.SubClassOf => SubClassOf(annotationsFor(axiomDef, annotationBindings, logicalBindings), defTerm, expressionFor(axiomDef, logicalBindings)) - case AxiomType.DisjointWith => DisjointClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, expressionFor(axiomDef, logicalBindings)) - case AxiomType.GCI => axiomFor(axiomDef, logicalBindings).getAnnotatedAxiom(annotationsFor(axiomDef, annotationBindings, logicalBindings).asJava) - }).toSet + case AxiomType.EquivalentTo => expressionFor(axiomDef, logicalBindings).map(ce => EquivalentClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, ce)) + case AxiomType.SubClassOf => expressionFor(axiomDef, logicalBindings).map(ce => SubClassOf(annotationsFor(axiomDef, annotationBindings, logicalBindings), defTerm, ce)) + case AxiomType.DisjointWith => expressionFor(axiomDef, logicalBindings).map(ce => DisjointClasses(annotationsFor(axiomDef, annotationBindings, logicalBindings).toSeq: _*)(defTerm, ce)) + case AxiomType.GCI => axiomFor(axiomDef, logicalBindings).map(ax => ax.getAnnotatedAxiom(annotationsFor(axiomDef, annotationBindings, logicalBindings).asJava)) + }).toSet.flatten private val term = Class(DOSDP.variableToIRI(DOSDP.DefinedClassVariable)) @@ -65,11 +77,11 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S vars.mapValues(expressionParser.parse) } - private def expressionFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): OWLClassExpression = - expressionParser.parse(template.replaced(bindings)) + private def expressionFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): Option[OWLClassExpression] = + template.replaced(bindings).map(expressionParser.parse) - private def axiomFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): OWLAxiom = - axiomParser.parse(template.replaced(bindings)) + private def axiomFor(template: PrintfText, bindings: Option[Map[String, SingleValue]]): Option[OWLAxiom] = + template.replaced(bindings).map(axiomParser.parse) private def annotationsFor(element: PrintfText, annotationBindings: Option[Map[String, Binding]], logicalBindings: Option[Map[String, Binding]]): Set[OWLAnnotation] = (for { @@ -113,20 +125,20 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S private def translateAnnotations(annotationField: NormalizedAnnotation, annotationBindings: Option[Bindings], logicalBindings: Option[Bindings]): Set[OWLAnnotation] = annotationField match { case NormalizedPrintfAnnotation(prop, text, vars, overrideColumnOpt, subAnnotations) => - val value = (for { + val valueOpt = (for { column <- overrideColumnOpt bindings <- annotationBindings SingleValue(binding) <- bindings.get(column) trimmed = binding.trim if trimmed.nonEmpty - } yield trimmed).getOrElse(PrintfText.replaced(text, vars, annotationBindings.map(singleValueBindings))) - Set(Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, value)) + } yield trimmed).orElse(PrintfText.replaced(text, vars, annotationBindings.map(singleValueBindings))) + valueOpt.toSet[String].map(value => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, value)) case NormalizedListAnnotation(prop, value, subAnnotations) => // If no variable bindings are passed in, dummy value is filled in using variable name val multiValBindingsOpt = annotationBindings.map(multiValueBindings) val bindingsMap = multiValBindingsOpt.getOrElse(Map(value -> MultiValue(Set("'$" + value + "'")))) - val listValue = bindingsMap(value) - listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, v)) + val listValueOpt = bindingsMap.get(value) + listValueOpt.toSet[MultiValue].flatMap(listValue => listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, v))) case NormalizedIRIValueAnnotation(prop, varr, subAnnotations) => val iriValue = (for { actualBindings <- logicalBindings diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Common.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Common.scala index d546a57..f1e0641 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Common.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Common.scala @@ -38,7 +38,7 @@ trait Common extends Command with LazyLogging { def inputDOSDPFrom(location: String): DOSDP = { val possibleFile = new File(location) val source = if (possibleFile.exists) Source.fromFile(possibleFile, "UTF-8") - else Source.fromURL(templateFile, "UTF-8") + else Source.fromURL(location, "UTF-8") parser.parse(source.mkString).right.flatMap(json => json.as[DOSDP]) match { case Right(dosdp) => dosdp case Left(error) => diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala index 23f0403..8f4f0c3 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala @@ -62,7 +62,7 @@ object Generate extends Command(description = "generate ontology axioms for TSV val (varBindingsItems, localLabelItems) = (for { vars <- dosdp.vars.toSeq varr <- vars.keys - filler <- row.get(varr) + filler <- row.get(varr).flatMap(stripToOption) fillerLabelOpt = for { fillerIRI <- Prefixes.idToIRI(filler, prefixes) label <- row.get(s"${varr}_label").flatMap(stripToOption) @@ -73,17 +73,17 @@ object Generate extends Command(description = "generate ontology axioms for TSV val listVarBindings = (for { listVars <- dosdp.list_vars.toSeq listVar <- listVars.keys - filler <- row.get(listVar) + filler <- row.get(listVar).flatMap(stripToOption) } yield listVar -> MultiValue(filler.split(DOSDP.MultiValueDelimiter).map(_.trim).toSet)).toMap val dataVarBindings = (for { dataVars <- dosdp.data_vars.toSeq dataVar <- dataVars.keys - filler <- row.get(dataVar) + filler <- row.get(dataVar).flatMap(stripToOption) } yield dataVar -> SingleValue(filler.trim)).toMap val dataListBindings = (for { dataListVars <- dosdp.data_list_vars.toSeq dataListVar <- dataListVars.keys - filler <- row.get(dataListVar) + filler <- row.get(dataListVar).flatMap(stripToOption) } yield dataListVar -> MultiValue(filler.split(DOSDP.MultiValueDelimiter).map(_.trim).toSet)).toMap val additionalBindings = for { (key, value) <- row.filterKeys(k => !knownColumns(k)) diff --git a/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.tsv b/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.tsv new file mode 100644 index 0000000..af6b456 --- /dev/null +++ b/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.tsv @@ -0,0 +1,6 @@ +defined_class structure structure_label taxon seeAlso exactSynonym relatedSynonyms term_name substitute_me +EX:0001 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle a synonym for this term related term 1|related term 2 Term 0001 one two +EX:0002 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 Term 0002 one two +EX:0003 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 one two +EX:0004 VTO:Holothuroidea UBERON:tentacle related term 1|related term 2 Term 0004 one two +EX:0005 UBERON:arm Arm VTO:Holothuroidea UBERON:tentacle a synonym for this term related term 1|related term 2 Term 0005 diff --git a/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.yaml b/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.yaml new file mode 100644 index 0000000..3b25814 --- /dev/null +++ b/src/test/resources/org/monarchinitiative/dosdp/missing_values_test.yaml @@ -0,0 +1,62 @@ +pattern_name: missing_values_test + +classes: + shape: PATO:shape + anatomical_entity: UBERON:anatomical_entity + thing: owl:Thing + +relations: + part_of: RO:part_of + +annotationProperties: + never_in_taxon: RO:never_in_taxon + +vars: + structure: anatomical_entity + taxon: thing + +list_vars: + seeAlso: anatomical_entity + +data_vars: + term_name: xsd:string + substitute_me: xsd:string + +data_list_vars: + exactSynonym: xsd:string + relatedSynonyms: xsd:string + broadSynonyms: xsd:string # don't put this column in the test input + +annotations: + - annotationProperty: never_in_taxon + var: taxon + +name: + text: "%s" + vars: + - term_name + +def: + text: "%s %s %s" + vars: + - term_name + - structure + - substitute_me_munged + +exact_synonym: + value: exactSynonym + +broad_synonym: + value: broadSynonyms + +substitutions: + - in: substitute_me + out: substitute_me_munged + match: (.+) (.+) + sub: \2 and then \1 + +equivalentTo: + text: "%s and (part_of some %s)" + vars: + - structure + - taxon diff --git a/src/test/scala/org/monarchinitiative/dosdp/BlankLineTest.scala b/src/test/scala/org/monarchinitiative/dosdp/BlankLineTest.scala index 4846a17..fb30e3e 100644 --- a/src/test/scala/org/monarchinitiative/dosdp/BlankLineTest.scala +++ b/src/test/scala/org/monarchinitiative/dosdp/BlankLineTest.scala @@ -10,7 +10,7 @@ class BlankLineTest extends UnitSpec { val dosdp = Generate.inputDOSDPFrom("src/test/resources/org/monarchinitiative/dosdp/test_blank_lines.yaml") val fillers = Generate.readFillers(new File("src/test/resources/org/monarchinitiative/dosdp/test_blank_lines.tsv"), new TSVFormat {}) - val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers: Iterator[Map[String, String]], None, true, true, None, false) + val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers, None, true, true, None, false) "Blank lines" should "not cause errors" in { axioms should contain(Class("http://ex.org/1") Annotation(RDFSLabel, "http://example.org/Entity1 thing")) diff --git a/src/test/scala/org/monarchinitiative/dosdp/MissingValuesTest.scala b/src/test/scala/org/monarchinitiative/dosdp/MissingValuesTest.scala new file mode 100644 index 0000000..8995cd8 --- /dev/null +++ b/src/test/scala/org/monarchinitiative/dosdp/MissingValuesTest.scala @@ -0,0 +1,17 @@ +package org.monarchinitiative.dosdp + +import java.io.File + +import com.github.tototoshi.csv.TSVFormat +import org.monarchinitiative.dosdp.cli.Generate + +class MissingValuesTest extends UnitSpec { + + "Missing columns and cell values" should "be handled by dropping outputs" in { + val dosdp = Generate.inputDOSDPFrom("src/test/resources/org/monarchinitiative/dosdp/missing_values_test.yaml") + val fillers = Generate.readFillers(new File("src/test/resources/org/monarchinitiative/dosdp/missing_values_test.tsv"), new TSVFormat {}) + val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers, None, true, true, None, false) + // No exceptions should be thrown + } + +} diff --git a/src/test/scala/org/monarchinitiative/dosdp/OverrideTest.scala b/src/test/scala/org/monarchinitiative/dosdp/OverrideTest.scala index 4ef2122..3db7dff 100644 --- a/src/test/scala/org/monarchinitiative/dosdp/OverrideTest.scala +++ b/src/test/scala/org/monarchinitiative/dosdp/OverrideTest.scala @@ -16,7 +16,6 @@ class OverrideTest extends UnitSpec { val fillers = CSVReader.open(new File("src/test/resources/org/monarchinitiative/dosdp/OverrideTest.tsv"), "utf-8")(new TSVFormat {}).iteratorWithHeaders val ontology = OWLManager.createOWLOntologyManager().loadOntology(IRI.create(new File("src/test/resources/org/monarchinitiative/dosdp/OverrideTest.ofn"))) val axioms = Generate.renderPattern(dosdp: DOSDP, OBOPrefixes, fillers: Iterator[Map[String, String]], Some(ontology), true, true, None, false) - axioms.foreach(println) "Overrides" should "be checked" in { axioms should contain(Class("http://ex.org/1") Annotation(RDFSLabel, "Entity 1 thing"))