Skip to content

Commit b0c6875

Browse files
committed
use only lxml for XML handling (#863)
1 parent b571118 commit b0c6875

File tree

9 files changed

+58
-112
lines changed

9 files changed

+58
-112
lines changed

.github/workflows/main.yml

-4
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ jobs:
88
strategy:
99
matrix:
1010
python-version: ['3.7', '3.8', '3.9']
11-
lxml: [true, false]
1211
env:
13-
LXML: ${{ matrix.lxml }}
1412
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1513
COVERALLS_SERVICE_NAME: github
1614
steps:
@@ -28,8 +26,6 @@ jobs:
2826
pip3 install -e .
2927
pip3 install -r requirements.txt
3028
pip3 install -r requirements-dev.txt
31-
echo "LXML => $LXML"
32-
if [ "$LXML" == "true" ]; then pip install lxml; fi
3329
- name: run tests ⚙️
3430
run: python3 -m pytest
3531
- name: run coveralls ⚙️

docs/en/installation.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Installation
44
Requirements
55
------------
66

7-
OWSLib requires a Python interpreter, as well as `ElementTree <https://docs.python.org/2/library/xml.etree.elementtree.html>`_ or `lxml <http://lxml.de>`_ for XML parsing.
7+
OWSLib requires a Python interpreter, as well as `lxml <https://lxml.de>`_ for XML parsing.
88

99
Install
1010
-------

etc/debian/control

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ Homepage: http://geopython.github.com/OWSLib/
99

1010
Package: python-owslib
1111
Architecture: all
12-
Depends: ${misc:Depends}, debconf, python (>=2.7), python-lxml
12+
Depends: ${misc:Depends}, debconf, python (>=3), python-lxml
1313
Description: OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models.

owslib/catalogue/csw2.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,6 @@ def getrecords(self, qtype=None, keywords=[], typenames='csw:Record', propertyna
226226
else:
227227
# construct request
228228
node0 = self._setrootelement('csw:GetRecords')
229-
if etree.__name__ != 'lxml.etree': # apply nsmap manually
230-
node0.set('xmlns:ows', namespaces['ows'])
231-
node0.set('xmlns:gmd', namespaces['gmd'])
232-
node0.set('xmlns:dif', namespaces['dif'])
233-
node0.set('xmlns:fgdc', namespaces['fgdc'])
234229
node0.set('outputSchema', outputschema)
235230
node0.set('outputFormat', format)
236231
node0.set('version', self.version)
@@ -354,11 +349,6 @@ def getrecords2(self, constraints=[], sortby=None, typenames='csw:Record', esn='
354349
else:
355350
# construct request
356351
node0 = self._setrootelement('csw:GetRecords')
357-
if etree.__name__ != 'lxml.etree': # apply nsmap manually
358-
node0.set('xmlns:ows', namespaces['ows'])
359-
node0.set('xmlns:gmd', namespaces['gmd'])
360-
node0.set('xmlns:dif', namespaces['dif'])
361-
node0.set('xmlns:fgdc', namespaces['fgdc'])
362352
node0.set('outputSchema', outputschema)
363353
node0.set('outputFormat', format)
364354
node0.set('version', self.version)
@@ -622,10 +612,7 @@ def _setidentifierkey(self, el):
622612
return el
623613

624614
def _setrootelement(self, el):
625-
if etree.__name__ == 'lxml.etree': # apply nsmap
626-
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
627-
else:
628-
return etree.Element(util.nspath_eval(el, namespaces))
615+
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
629616

630617
def _setconstraint(self, parent, qtype=None, propertyname='csw:AnyText', keywords=[], bbox=None, cql=None,
631618
identifier=None):

owslib/catalogue/csw3.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,6 @@ def getrecords(self, constraints=[], sortby=None, typenames='csw30:Record', esn=
242242
else:
243243
# construct request
244244
node0 = self._setrootelement('csw30:GetRecords')
245-
if etree.__name__ != 'lxml.etree': # apply nsmap manually
246-
node0.set('xmlns:ows110', namespaces['ows110'])
247-
node0.set('xmlns:gmd', namespaces['gmd'])
248-
node0.set('xmlns:dif', namespaces['dif'])
249-
node0.set('xmlns:fgdc', namespaces['fgdc'])
250245
node0.set('outputSchema', outputschema)
251246
node0.set('outputFormat', format)
252247
node0.set('version', self.version)
@@ -516,10 +511,7 @@ def _setidentifierkey(self, el):
516511
return el
517512

518513
def _setrootelement(self, el):
519-
if etree.__name__ == 'lxml.etree': # apply nsmap
520-
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
521-
else:
522-
return etree.Element(util.nspath_eval(el, namespaces))
514+
return etree.Element(util.nspath_eval(el, namespaces), nsmap=namespaces)
523515

524516
def _setconstraint(self, parent, qtype=None, propertyname='csw30:AnyText', keywords=[], bbox=None, cql=None,
525517
identifier=None):

owslib/etree.py

+14-18
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,33 @@
44
# Contact email: sgillies@frii.com
55
# =============================================================================
66

7+
8+
from lxml import etree
9+
from lxml.etree import ParseError
10+
ElementType = etree._Element
11+
712
from owslib.namespaces import Namespaces
813

914

10-
def patch_well_known_namespaces(etree_module):
11-
"""Monkey patches the etree module to add some well-known namespaces."""
15+
def patch_well_known_namespaces():
16+
"""Monkey patches lxml.etree to add some well-known namespaces."""
1217

1318
ns = Namespaces()
1419

1520
try:
16-
register_namespace = etree_module.register_namespace
21+
register_namespace = etree.register_namespace
1722
except AttributeError:
18-
etree_module._namespace_map
23+
etree._namespace_map
1924

2025
def register_namespace(prefix, uri):
21-
etree_module._namespace_map[uri] = prefix
26+
etree._namespace_map[uri] = prefix
2227

2328
for k, v in list(ns.get_namespaces().items()):
2429
register_namespace(k, v)
2530

31+
etree.set_default_parser(
32+
parser=etree.XMLParser(resolve_entities=False)
33+
)
2634

27-
# try to find lxml or elementtree
28-
try:
29-
from lxml import etree
30-
from lxml.etree import ParseError
31-
ElementType = etree._Element
32-
except ImportError:
33-
import xml.etree.ElementTree as etree
34-
ElementType = etree.Element
35-
try:
36-
from xml.etree.ElementTree import ParseError
37-
except ImportError:
38-
from xml.parsers.expat import ExpatError as ParseError
3935

40-
patch_well_known_namespaces(etree)
36+
patch_well_known_namespaces()

owslib/feature/schema.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from urllib.parse import urlencode, parse_qsl
1414
from owslib.etree import etree
1515
from owslib.namespaces import Namespaces
16-
from owslib.util import which_etree, findall, Authentication, openURL
16+
from owslib.util import findall, Authentication, openURL
1717

1818
MYNS = Namespaces()
1919
XS_NAMESPACE = MYNS.get_namespace("xs")

owslib/util.py

+36-62
Original file line numberDiff line numberDiff line change
@@ -277,11 +277,8 @@ def nspath_eval(xpath, namespaces):
277277

278278
def cleanup_namespaces(element):
279279
""" Remove unused namespaces from an element """
280-
if etree.__name__ == 'lxml.etree':
281-
etree.cleanup_namespaces(element)
282-
return element
283-
else:
284-
return etree.fromstring(etree.tostring(element))
280+
etree.cleanup_namespaces(element)
281+
return element
285282

286283

287284
def add_namespaces(root, ns_keys):
@@ -292,35 +289,34 @@ def add_namespaces(root, ns_keys):
292289

293290
ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys]
294291

295-
if etree.__name__ != 'lxml.etree':
296-
# We can just add more namespaces when not using lxml.
297-
# We can't re-add an existing namespaces. Get a list of current
298-
# namespaces in use
299-
existing_namespaces = set()
300-
for elem in root.iter():
301-
if elem.tag[0] == "{":
302-
uri, tag = elem.tag[1:].split("}")
303-
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
304-
for key, link in ns_keys:
305-
if link is not None and key not in existing_namespaces:
306-
root.set("xmlns:%s" % key, link)
307-
return root
308-
else:
309-
# lxml does not support setting xmlns attributes
310-
# Update the elements nsmap with new namespaces
311-
new_map = root.nsmap
312-
for key, link in ns_keys:
313-
if link is not None:
314-
new_map[key] = link
315-
# Recreate the root element with updated nsmap
316-
new_root = etree.Element(root.tag, nsmap=new_map)
317-
# Carry over attributes
318-
for a, v in list(root.items()):
319-
new_root.set(a, v)
320-
# Carry over children
321-
for child in root:
322-
new_root.append(deepcopy(child))
323-
return new_root
292+
# lxml does not support setting xmlns attributes
293+
# Update the elements nsmap with new namespaces
294+
new_map = root.nsmap
295+
for key, link in ns_keys:
296+
if link is not None:
297+
new_map[key] = link
298+
# Recreate the root element with updated nsmap
299+
new_root = etree.Element(root.tag, nsmap=new_map)
300+
# Carry over attributes
301+
for a, v in list(root.items()):
302+
new_root.set(a, v)
303+
# Carry over children
304+
for child in root:
305+
new_root.append(deepcopy(child))
306+
return new_root
307+
308+
# We can just add more namespaces when not using lxml.
309+
# We can't re-add an existing namespaces. Get a list of current
310+
# namespaces in use
311+
existing_namespaces = set()
312+
for elem in root.iter():
313+
if elem.tag[0] == "{":
314+
uri, tag = elem.tag[1:].split("}")
315+
existing_namespaces.add(namespaces.get_namespace_from_url(uri))
316+
for key, link in ns_keys:
317+
if link is not None and key not in existing_namespaces:
318+
root.set("xmlns:%s" % key, link)
319+
return root
324320

325321

326322
def getXMLInteger(elem, tag):
@@ -519,21 +515,14 @@ def element_to_string(element, encoding=None, xml_declaration=False):
519515
if encoding is None:
520516
encoding = "ISO-8859-1"
521517

522-
if etree.__name__ == 'lxml.etree':
523-
if xml_declaration:
524-
if encoding in ['unicode', 'utf-8']:
525-
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
526-
etree.tostring(element, encoding='unicode'))
527-
else:
528-
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
518+
if xml_declaration:
519+
if encoding in ['unicode', 'utf-8']:
520+
output = '<?xml version="1.0" encoding="utf-8" standalone="no"?>\n{}'.format(
521+
etree.tostring(element, encoding='unicode'))
529522
else:
530-
output = etree.tostring(element)
523+
output = etree.tostring(element, encoding=encoding, xml_declaration=True)
531524
else:
532-
if xml_declaration:
533-
output = '<?xml version="1.0" encoding="{}" standalone="no"?>\n{}'.format(
534-
encoding, etree.tostring(element, encoding=encoding))
535-
else:
536-
output = etree.tostring(element)
525+
output = etree.tostring(element)
537526

538527
return output
539528

@@ -777,21 +766,6 @@ def bind_url(url):
777766
log.addHandler(NullHandler())
778767

779768

780-
def which_etree():
781-
"""decipher which etree library is being used by OWSLib"""
782-
783-
which_etree = None
784-
785-
if 'lxml' in etree.__file__:
786-
which_etree = 'lxml.etree'
787-
elif 'xml/etree' in etree.__file__:
788-
which_etree = 'xml.etree'
789-
elif 'elementree' in etree.__file__:
790-
which_etree = 'elementtree.ElementTree'
791-
792-
return which_etree
793-
794-
795769
def findall(root, xpath, attribute_name=None, attribute_value=None):
796770
"""Find elements recursively from given root element based on
797771
xpath and possibly given attribute

requirements.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
dataclasses; python_version < '3.7'
2+
lxml
13
python-dateutil>=1.5
24
pytz
3-
requests>=1.0
45
pyyaml
5-
dataclasses; python_version < '3.7'
6+
requests>=1.0

0 commit comments

Comments
 (0)