Skip to content

Commit

Permalink
Decouple XmlParser from lxml
Browse files Browse the repository at this point in the history
Note:
Introduced XmlHandler interface to bolt parsers
and content handlers like sax.

Default Handler LxmlIterparseHandler :)
  • Loading branch information
tefra committed Aug 29, 2020
1 parent 27055d2 commit 208c48f
Show file tree
Hide file tree
Showing 9 changed files with 258 additions and 202 deletions.
2 changes: 0 additions & 2 deletions tests/codegen/parsers/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from unittest import mock
from unittest import TestCase

from lxml import etree

from xsdata.codegen.parsers.schema import SchemaParser
from xsdata.models.enums import FormType
from xsdata.models.enums import Mode
Expand Down
101 changes: 79 additions & 22 deletions tests/formats/dataclass/parsers/test_nodes.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import List
from typing import Type
from typing import Union
from unittest import mock
from unittest.case import TestCase

from lxml import etree

from tests import fixtures_dir
from tests.fixtures.books import BookForm
from tests.fixtures.books import Books
from xsdata.exceptions import ParserError
from xsdata.exceptions import XmlContextError
from xsdata.formats.bindings import T
from xsdata.formats.dataclass.context import XmlContext
from xsdata.formats.dataclass.models.elements import XmlElement
from xsdata.formats.dataclass.models.elements import XmlMeta
from xsdata.formats.dataclass.models.elements import XmlText
from xsdata.formats.dataclass.models.elements import XmlWildcard
from xsdata.formats.dataclass.models.generics import AnyElement
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.handlers import XmlHandler
from xsdata.formats.dataclass.parsers.nodes import ElementNode
from xsdata.formats.dataclass.parsers.nodes import ElementParser
from xsdata.formats.dataclass.parsers.nodes import EventParser
from xsdata.formats.dataclass.parsers.nodes import NodeParser
from xsdata.formats.dataclass.parsers.nodes import PrimitiveNode
from xsdata.formats.dataclass.parsers.nodes import SkipNode
from xsdata.formats.dataclass.parsers.nodes import UnionNode
Expand Down Expand Up @@ -450,28 +454,81 @@ def test_bind(self):
self.assertEqual(False, node.bind("foo", None, None, []))


class ElementParserTests(TestCase):
def test_parse_from_tree(self):
path = fixtures_dir.joinpath("books/books.xml")
tree = etree.parse(path.resolve().as_uri())
class NodeParserTests(TestCase):
def test_parse(self):
@dataclass
class TestHandler(XmlHandler):
def process(self, source: Any, clazz: Type[T], config: ParserConfig) -> Any:
return Books()

parser = ElementParser()
actual = parser.parse(tree, Books)
self.assertEqual(2, len(actual.book))
parser = NodeParser(handler=TestHandler)
result = parser.parse([], Books)
self.assertEqual(Books(), result)

# The tree will cleared
self.assertEqual(0, len(tree.getroot()))
def test_parse_when_result_type_is_wrong(self):
source = [("escape", None)]
with self.assertRaises(ParserError) as cm:
parser = NodeParser()
parser.parse(source, Books)

def test_parse_context_unhandled_event(self):
context = [("escape", None)]
with self.assertRaises(ParserError):
parser = ElementParser()
parser.parse_context(context, Books)
self.assertEqual("Failed to create target class `Books`", str(cm.exception))

def test_start(self):
parser = NodeParser()
queue = []
objects = []

class EventParserTests(TestCase):
def test_parse_unhandled_event(self):
source = [("escape", None)]
with self.assertRaises(ParserError):
parser = EventParser()
parser.parse(source, Books)
attrs = {"k": "v"}
ns_map = {"a": "b"}
expected_node = ElementNode(
position=0,
context=parser.context,
meta=parser.context.build(Books),
config=parser.config,
attrs=attrs,
ns_map=ns_map,
)
parser.start(queue, "{urn:books}books", attrs, ns_map, objects, Books)
self.assertEqual(1, len(queue))
self.assertEqual(expected_node, queue[0])

expected_node = ElementNode(
position=0,
context=parser.context,
meta=parser.context.build(BookForm),
config=parser.config,
attrs={},
ns_map={},
)
parser.start(queue, "book", {}, {}, objects, Books)

self.assertEqual(2, len(queue))
self.assertEqual(expected_node, queue[-1])

@mock.patch.object(PrimitiveNode, "bind", return_value=True)
def test_end(self, mock_assemble):
parser = NodeParser()
objects = [("q", "result")]
queue = []
var = XmlText(name="foo", qname="foo")
queue.append(PrimitiveNode(var=var, ns_map={}))

result = parser.end(queue, "author", "foobar", None, objects)
self.assertEqual("result", result)
self.assertEqual(0, len(queue))
self.assertEqual(("q", result), objects[-1])
mock_assemble.assert_called_once_with("author", "foobar", None, objects)

def test_end_with_no_result(self):
parser = NodeParser()
objects = [("q", "result")]
queue = [SkipNode()]

result = parser.end(queue, "author", "foobar", None, objects)
self.assertIsNone(result)
self.assertEqual(0, len(queue))

def test_namespace_prefix(self):
parser = NodeParser()
parser.namespace_prefix("bar", "foo")
self.assertEqual({"bar": "foo"}, parser.namespaces.ns_map)
66 changes: 14 additions & 52 deletions tests/formats/dataclass/parsers/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
from tests import fixtures_dir
from tests.fixtures.books import BookForm
from tests.fixtures.books import Books
from xsdata.exceptions import ParserError
from xsdata.formats.dataclass.models.elements import XmlText
from xsdata.formats.dataclass.parsers.config import ParserConfig
from xsdata.formats.dataclass.parsers.nodes import ElementNode
from xsdata.formats.dataclass.parsers.nodes import PrimitiveNode
from xsdata.formats.dataclass.parsers.nodes import SkipNode
from xsdata.formats.dataclass.parsers.xml import XmlParser
Expand All @@ -22,77 +20,41 @@ class XmlParserTests(TestCase):
def setUp(self):
super().setUp()
self.parser = XmlParser()
self.parser.index = 10
self.parser.objects = [(x, x) for x in "abcde"]

def test_parse_context_raises_exception(self):
with self.assertRaises(ParserError) as cm:
self.parser.parse_context([], Books)

self.assertEqual("Failed to create target class `Books`", str(cm.exception))

def test_add_namespace(self):
self.parser.add_namespace(("foo", "bar"))
self.assertEqual({"foo": "bar"}, self.parser.namespaces.ns_map)

@mock.patch.object(ElementNode, "child")
@mock.patch.object(XmlParser, "emit_event")
def test_start(self, mock_emit_event, mock_next_node):
var = XmlText(name="foo", qname="foo")
primitive_node = PrimitiveNode(var=var, ns_map={})
mock_next_node.return_value = primitive_node
config = ParserConfig()
attrs = {}
ns_map = {}

def test_start(self, mock_emit_event):
attrs = {"a": "b"}
queue = []
expected_root_node = ElementNode(
position=0,
context=self.parser.context,
meta=self.parser.context.build(Books),
config=config,
attrs=attrs,
ns_map=ns_map,
)

self.parser.start(queue, "{urn:books}books", attrs, ns_map, 0, Books)

self.parser.start(queue, "{urn:books}books", attrs, {}, [], Books)
self.assertEqual(1, len(queue))
self.assertEqual(expected_root_node, queue[0])

self.parser.start(queue, "child", attrs, ns_map, 1, Books)
self.assertEqual(2, len(queue))
self.assertEqual(primitive_node, queue[1])

mock_emit_event.assert_has_calls(
[
mock.call(EventType.START, "{urn:books}books", attrs=attrs),
mock.call(EventType.START, "child", attrs={}),
]
mock_emit_event.assert_called_once_with(
EventType.START, "{urn:books}books", attrs=attrs
)

@mock.patch.object(XmlParser, "emit_event")
@mock.patch.object(PrimitiveNode, "bind", return_value=True)
def test_end(self, mock_assemble, mock_emit_event):
objects = [("q", "result")]
def test_end(self, mock_emit_event):
objects = []
queue = []
var = XmlText(name="foo", qname="foo")
var = XmlText(name="foo", qname="foo", types=[bool])
queue.append(PrimitiveNode(var=var, ns_map={}))

result = self.parser.end(queue, "author", "foobar", None, objects)
self.assertEqual("result", result)
result = self.parser.end(queue, "enabled", "true", None, objects)
self.assertTrue(result)
self.assertEqual(0, len(queue))
self.assertEqual(("q", result), objects[-1])
mock_assemble.assert_called_once_with("author", "foobar", None, objects)
mock_emit_event.assert_called_once_with(EventType.END, "author", obj=result)
self.assertEqual(("enabled", True), objects[-1])
mock_emit_event.assert_called_once_with(EventType.END, "enabled", obj=result)

@mock.patch.object(XmlParser, "emit_event")
def test_end_with_no_result(self, mock_emit_event):
objects = [("q", "result")]
objects = []
queue = [SkipNode()]

result = self.parser.end(queue, "author", "foobar", None, objects)
self.assertIsNone(result)
self.assertEqual(0, len(objects))
self.assertEqual(0, len(queue))
self.assertEqual(0, mock_emit_event.call_count)

Expand Down
4 changes: 2 additions & 2 deletions xsdata/codegen/parsers/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ def start(
qname: str,
attrs: Dict,
ns_map: Dict,
position: int,
objects: List[Parsed],
clazz: Type[T],
):
self.index += 1
self.indices.append(self.index)
super().start(queue, qname, attrs, ns_map, position, clazz)
super().start(queue, qname, attrs, ns_map, objects, clazz)

def end(
self,
Expand Down
48 changes: 39 additions & 9 deletions xsdata/formats/bindings.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
import abc
import io
import pathlib
from abc import ABC
from abc import abstractmethod
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import TypeVar

T = TypeVar("T")


class AbstractSerializer(ABC):
@abstractmethod
class AbstractSerializer(abc.ABC):
@abc.abstractmethod
def render(self, obj: object) -> object:
"""Render the given object to the target output format."""


T = TypeVar("T")


class AbstractParser(ABC):
class AbstractParser(abc.ABC):
def from_path(self, path: pathlib.Path, clazz: Type[T]) -> T:
"""Parse the input file path and return the resulting object tree."""
return self.parse(str(path.resolve()), clazz)
Expand All @@ -29,6 +30,35 @@ def from_bytes(self, source: bytes, clazz: Type[T]) -> T:
"""Parse the input bytes array return the resulting object tree."""
return self.parse(io.BytesIO(source), clazz)

@abstractmethod
@abc.abstractmethod
def parse(self, source: Any, clazz: Type[T]) -> T:
"""Parse the input stream and return the resulting object tree."""


class EventParser(AbstractParser):
@abc.abstractmethod
def start(
self,
queue: List,
qname: str,
attrs: Dict,
ns_map: Dict,
objects: List,
clazz: Type[T],
):
""""""

@abc.abstractmethod
def end(
self,
queue: List,
qname: str,
text: Optional[str],
tail: Optional[str],
objects: List,
) -> Any:
""""""

@abc.abstractmethod
def namespace_prefix(self, prefix: Optional[str], uri: str):
"""Add the given namespace in the registry."""
Empty file.
Loading

0 comments on commit 208c48f

Please sign in to comment.