Skip to content

Commit

Permalink
Allow for XML postlog
Browse files Browse the repository at this point in the history
The specification allows for `misc` appearing after the root element,
and the event parser should support this, as well as the document
builder.

Since a stream can contain concatenated XML documents, a choice has to
be made regarding how misc is associated between 2 documents in the
stream. This implementation makes the choice to associate the misc as
postlog of the previous document since it allows to emit early without
knowing what comes next (i.e. whether the stream actually contains only
one document or several documents).
  • Loading branch information
satabin committed Jun 9, 2022
1 parent a805e80 commit bd33f1a
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ package object scalaXml {
standalone: Option[Boolean],
doctype: Option[XmlEvent.XmlDoctype],
prolog: List[Misc],
root: Elem): Document = {
root: Elem,
postlog: List[Misc]): Document = {
val document = new Document()
document.version = version
document.encoding = encoding
document.standAlone = standalone
document.children = prolog :+ root
document.children = prolog ++ (root :: postlog)
document.docElem = root.head
document
}
Expand Down
3 changes: 2 additions & 1 deletion xml/src/main/scala/fs2/data/xml/dom/DocumentBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ trait DocumentBuilder[Document] {
standalone: Option[Boolean],
doctype: Option[XmlEvent.XmlDoctype],
prolog: List[Misc],
root: Elem): Document
root: Elem,
postlog: List[Misc]): Document

def makeComment(content: String): Option[Misc]

Expand Down
19 changes: 18 additions & 1 deletion xml/src/main/scala/fs2/data/xml/dom/TreeParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ class TreeParser[F[_], Node](implicit F: RaiseThrowable[F], builder: DocumentBui
Pull.raiseError(new XmlTreeException(s"unexpected event '$evt'"))
}

private def postlog(
chunk: Chunk[XmlEvent],
idx: Int,
rest: Stream[F, XmlEvent]): Pull[F, INothing, (List[builder.Misc], Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
(chunk, idx, rest, new ListBuffer[builder.Misc]).tailRecM { case (chunk, idx, rest, misc) =>
peek(chunk, idx, rest).flatMap {
case (XmlEvent.Comment(comment), chunk, idx, rest) =>
Pull.pure((chunk, idx + 1, rest, misc ++= builder.makeComment(comment)).asLeft)
case (XmlEvent.XmlPI(target, content), chunk, idx, rest) =>
Pull.pure((chunk, idx + 1, rest, misc += builder.makePI(target, content)).asLeft)
case (_, chunk, idx, rest) =>
Pull.pure((misc.result(), chunk, idx, rest).asRight)
}
}

private def document(chunk: Chunk[XmlEvent],
idx: Int,
rest: Stream[F, XmlEvent]): Pull[F, Node, (Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
Expand All @@ -125,14 +140,16 @@ class TreeParser[F[_], Node](implicit F: RaiseThrowable[F], builder: DocumentBui
for {
(decl, doctype, prolog, chunk, idx, rest) <- prolog(chunk, idx, rest)
(node, chunk, idx, rest) <- element(chunk, idx, rest)
(postlog, chunk, idx, rest) <- postlog(chunk, idx, rest)
(chunk, idx, rest) <- expect(XmlEvent.EndDocument, chunk, idx, rest)
() <- Pull.output1(
builder.makeDocument(decl.map(_.version),
decl.flatMap(_.encoding),
decl.flatMap(_.standalone),
doctype,
prolog,
node))
node,
postlog))
} yield (chunk, idx, rest)
case (evt, _, _, _) => Pull.raiseError(new XmlTreeException(s"unexpected event '$evt'"))
}
Expand Down
62 changes: 51 additions & 11 deletions xml/src/main/scala/fs2/data/xml/internals/EventParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ private[xml] object EventParser {
scanPrologToken2(ctx, false, chunkAcc)
}
case (ctx, chunkAcc, MarkupToken.StartToken(name)) =>
readElement(ctx, false, name, chunkAcc).map(Some(_))
readElement(ctx, false, name, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(None)) =>
scanPrologToken1(ctx, false, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(Some(comment))) =>
Expand Down Expand Up @@ -789,11 +789,11 @@ private[xml] object EventParser {
scanPrologToken2(ctx, is11, chunkAcc)
}
case Some((ctx, chunkAcc, MarkupToken.StartToken(name))) =>
readElement(ctx, is11, name, chunkAcc).map(Some(_))
readElement(ctx, is11, name, chunkAcc)
case Some((_, chunkAcc, t)) =>
fail("22", s"unexpected markup $t", Some(chunkAcc))
case None =>
Pull.pure(None)
Pull.output1(XmlEvent.EndDocument).as(None)
}

def handleVersion(
Expand Down Expand Up @@ -1000,21 +1000,61 @@ private[xml] object EventParser {
scanPrologToken2(ctx, is11, chunkAcc += XmlEvent.XmlPI(name, body))
}
case Some((ctx, chunkAcc, MarkupToken.StartToken(name))) =>
readElement(ctx, is11, name, chunkAcc).map(Some(_))
readElement(ctx, is11, name, chunkAcc)
case Some((_, chunkAcc, t)) =>
fail("22", s"unexpected markup $t", Some(chunkAcc))
case None =>
Pull.pure(None)
Pull.output1(XmlEvent.EndDocument).as(None)
}

def readElement(ctx: T.Context,
is11: Boolean,
name: QName,
chunkAcc: VectorBuilder[XmlEvent]): Pull[F, XmlEvent, (T.Context, VectorBuilder[XmlEvent])] =
def readElement(
ctx: T.Context,
is11: Boolean,
name: QName,
chunkAcc: VectorBuilder[XmlEvent]): Pull[F, XmlEvent, Option[(T.Context, VectorBuilder[XmlEvent])]] =
completeStartTag(ctx, is11, name, chunkAcc).flatMap {
case (ctx, chunkAcc, startTag) if startTag.isEmpty =>
Pull.pure((ctx, chunkAcc += startTag += XmlEvent.EndTag(name)))
case (ctx, chunkAcc, startTag) => readContent(ctx, is11, name, chunkAcc += startTag)
scanPostlog(ctx, chunkAcc += startTag += XmlEvent.EndTag(name))
case (ctx, chunkAcc, startTag) =>
readContent(ctx, is11, name, chunkAcc += startTag).flatMap { case (ctx, chunkAcc) =>
scanPostlog(ctx, chunkAcc)
}
}

def scanPostlog(
ctx: T.Context,
chunkAcc: VectorBuilder[XmlEvent]): Pull[F, XmlEvent, Option[(T.Context, VectorBuilder[XmlEvent])]] =
space(ctx, chunkAcc).flatMap { case (ctx, chunkAcc) =>
peekChar(ctx, chunkAcc).flatMap {
case Some((ctx, chunkAcc, '<')) =>
readMarkupToken(ctx, chunkAcc)
.flatMap {
case (ctx, chunkAcc, MarkupToken.PIToken(name)) if name.equalsIgnoreCase("xml") =>
handleXmlDecl(ctx, chunkAcc).flatMap { case (ctx, chunkAcc, (is11, decl)) =>
scanPrologToken1(ctx, is11, chunkAcc += XmlEvent.EndDocument += XmlEvent.StartDocument += decl)
}
case (ctx, chunkAcc, MarkupToken.PIToken(name)) =>
readPIBody(ctx, chunkAcc).flatMap { case (ctx, chunkAcc, body) =>
scanPostlog(ctx, chunkAcc += XmlEvent.XmlPI(name, body))
}
case (ctx, chunkAcc, MarkupToken.DeclToken(name)) =>
handleDecl(ctx, name, chunkAcc).flatMap { case (ctx, chunkAcc) =>
scanPrologToken2(ctx, false, chunkAcc += XmlEvent.EndDocument += XmlEvent.StartDocument)
}
case (ctx, chunkAcc, MarkupToken.StartToken(name)) =>
readElement(ctx, false, name, chunkAcc += XmlEvent.EndDocument += XmlEvent.StartDocument)
case (ctx, chunkAcc, MarkupToken.CommentToken(None)) =>
scanPostlog(ctx, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(Some(comment))) =>
scanPostlog(ctx, chunkAcc += XmlEvent.Comment(comment))
case (_, chunkAcc, t) =>
fail("22", s"unexpected markup $t", Some(chunkAcc))
}
case Some((ctx, chunkAcc, _)) =>
scanPrologToken1(ctx, false, chunkAcc += XmlEvent.EndDocument += XmlEvent.StartDocument)
case None =>
Pull.output1(XmlEvent.EndDocument).as(None)
}
}

def readContent(ctx: T.Context,
Expand Down
85 changes: 82 additions & 3 deletions xml/src/test/scala/fs2/data/xml/EventParserTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,51 @@ import weaver._

object EventParserTest extends SimpleIOSuite {

test("XML parser should handle all kind of XML elements") {
val input = Stream.emits("""<?xml version="1.1" encoding="utf-8"?>
|<a att1="value1" att2="&amp; another one">
| <!-- a comment -->
| <b><![CDATA[Test]]></b>
| <b/>
|</a>
|<?target content?>
|<!-- closing comment -->""".stripMargin)

input
.covary[IO]
.through(events(true))
.compile
.toList
.map(events =>
expect(
events == List(
XmlEvent.StartDocument,
XmlEvent.XmlDecl("1.1", Some("utf-8"), None),
XmlEvent.StartTag(
QName("a"),
List(
Attr(QName("att1"), List(XmlEvent.XmlString("value1", false))),
Attr(QName("att2"), List(XmlEvent.XmlString("", false), XmlEvent.XmlEntityRef("amp"), XmlEvent.XmlString(" another one", false)))
),
false
),
XmlEvent.XmlString("\n ", false),
XmlEvent.Comment(" a comment "),
XmlEvent.XmlString("\n ", false),
XmlEvent.StartTag(QName("b"), Nil, false),
XmlEvent.XmlString("Test", true),
XmlEvent.EndTag(QName("b")),
XmlEvent.XmlString("\n ", false),
XmlEvent.StartTag(QName("b"), Nil, true),
XmlEvent.EndTag(QName("b")),
XmlEvent.XmlString("\n", false),
XmlEvent.EndTag(QName("a")),
XmlEvent.XmlPI("target", "content"),
XmlEvent.Comment(" closing comment "),
XmlEvent.EndDocument
)))
}

test("XML should generate proper events") {
val input = """<root>
| <a attr="value">
Expand Down Expand Up @@ -92,6 +137,35 @@ object EventParserTest extends SimpleIOSuite {
)))
}

test("Postlog has precedence over prolog when several documents are concatenated") {
val input = """<a></a><?target content?><?xml version="1.1"?><a></a><!-- comment --><a></a>"""
Stream
.emits(input)
.covary[IO]
.through(events(true))
.compile
.toList
.map(events =>
expect(
events == List(
XmlEvent.StartDocument,
XmlEvent.StartTag(QName("a"), Nil, false),
XmlEvent.EndTag(QName("a")),
XmlEvent.XmlPI("target", "content"),
XmlEvent.EndDocument,
XmlEvent.StartDocument,
XmlEvent.XmlDecl("1.1", None, None),
XmlEvent.StartTag(QName("a"), Nil, false),
XmlEvent.EndTag(QName("a")),
XmlEvent.Comment(" comment "),
XmlEvent.EndDocument,
XmlEvent.StartDocument,
XmlEvent.StartTag(QName("a"), Nil, false),
XmlEvent.EndTag(QName("a")),
XmlEvent.EndDocument
)))
}

test("Comments should be ignored by default") {
val input = """<!-- some comment -->
|<a>
Expand Down Expand Up @@ -171,13 +245,18 @@ object EventParserTest extends SimpleIOSuite {
.through(fs2.text.utf8.decode)
.flatMap(Stream.emits(_))
.through(events())
.compile
.drain
.attempt
.map(res => expect(res.isRight, s"Failed to parse $path"))
.compile
.toList
.map(expectWellFormed(_))
}
.compile
.foldMonoid
}

def expectWellFormed(res: List[Either[Throwable, XmlEvent]]): Expectations =
expect(res.isEmpty) ||
expect(res.head == Right(XmlEvent.StartDocument)) &&
expect(res.last == Right(XmlEvent.EndDocument))

}
8 changes: 5 additions & 3 deletions xml/src/test/scala/fs2/data/xml/dom/EventifierSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ abstract class EventifierSpec[Doc](implicit builder: DocumentBuilder[Doc], event
val input = Stream.emits("""<?xml version="1.1" encoding="utf-8"?>
|<a att1="value1" att2="&amp; another one">
| <!-- a comment -->
| <b>Test</b>
| <b><![CDATA[Test]]></b>
| <b/>
|</a>""".stripMargin)
|</a>
|<?target content?>
|<!-- closing comment -->""".stripMargin)

val evts = input.through(events[Fallible, Char]())
val evts = input.through(events[Fallible, Char](true))

val roundtrip = evts.through(documents).through(eventify)

Expand Down

0 comments on commit bd33f1a

Please sign in to comment.