Skip to content

Commit

Permalink
Fix fn:unparsed-text* functions and update release info
Browse files Browse the repository at this point in the history
  - Use codecs.getreader() for checking resource data stream
  • Loading branch information
brunato committed Feb 10, 2024
1 parent a1879e2 commit ffd19af
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 36 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
CHANGELOG
*********

`v4.2.1`_ (2024-02-10)
======================
* Fix dynamic context initialization with lxml a non-root element (issue #71)
* Fix XP30+ function fn:function-lookup
* Fix XP30+ fn:unparsed-text, fn:unparsed-text-lines and fn:unparsed-text-available

`v4.2.0`_ (2024-02-03)
======================
* Drop support for Python 3.7
Expand Down Expand Up @@ -443,3 +449,4 @@ CHANGELOG
.. _v4.1.4: https://github.com/sissaschool/elementpath/compare/v4.1.3...v4.1.4
.. _v4.1.5: https://github.com/sissaschool/elementpath/compare/v4.1.4...v4.1.5
.. _v4.2.0: https://github.com/sissaschool/elementpath/compare/v4.1.5...v4.2.0
.. _v4.2.1: https://github.com/sissaschool/elementpath/compare/v4.2.0...v4.2.1
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ has it's own implementation of XPath 1.0.
Installation and usage
======================

You can install the package with *pip* in a Python 3.7+ environment::
You can install the package with *pip* in a Python 3.8+ environment::

pip install elementpath

Expand Down
75 changes: 44 additions & 31 deletions elementpath/xpath30/_xpath30_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1268,33 +1268,29 @@ def evaluate_unparsed_text_functions(self, context=None):
raise self.error('FOUT1190') from None

if context is not None and uri in context.text_resources:
obj = context.text_resources[uri]
text = context.text_resources[uri]
else:
try:
with urlopen(uri) as rp:
obj = rp.read()
except (ValueError, URLError) as err:
message = str(err)
if 'No such file' in message or \
'unknown url type' in message or \
'HTTP Error 404' in message or \
'failure in name resolution' in message:
raise self.error('FOUT1170', message) from None
raise self.error('FOUT1190') from None
else:
if context is not None:
context.text_resources[uri] = obj
stream_reader = codecs.getreader(encoding)(rp)
text = stream_reader.read()
except URLError as err:
raise self.error('FOUT1170', err) from None
except ValueError as err:
if len(self) > 1:
raise self.error('FOUT1190', err) from None

try:
text = codecs.decode(obj, encoding)
except UnicodeDecodeError:
if len(self) > 1:
raise self.error('FOUT1190') from None
try:
with urlopen(uri) as rp:
stream_reader = codecs.getreader('UTF-16')(rp)
text = stream_reader.read()
except URLError as err:
raise self.error('FOUT1170', err) from None
except ValueError as err:
raise self.error('FOUT1190', err) from None

try:
text = codecs.decode(obj, 'UTF-16')
except UnicodeDecodeError:
raise self.error('FOUT1190') from None
if context is not None:
context.text_resources[uri] = text

if not all(is_xml_codepoint(ord(s)) for s in text):
raise self.error('FOUT1190')
Expand Down Expand Up @@ -1330,22 +1326,39 @@ def evaluate_unparsed_text_available_function(self, context=None):

try:
uri = self.get_absolute_uri(href)
except ValueError:
return False

try:
codecs.lookup(encoding)
with urlopen(uri) as rp:
obj = rp.read()
except (ValueError, URLError, LookupError):
except LookupError:
return False

try:
return all(is_xml_codepoint(ord(s)) for s in codecs.decode(obj, encoding))
except UnicodeDecodeError:
with urlopen(uri) as rp:
stream_reader = codecs.getreader(encoding)(rp)
for line in stream_reader:
if any(not is_xml_codepoint(ord(s)) for s in line):
return False
except URLError:
return False
except ValueError:
if len(self) > 1:
return False
else:
return True

try:
return all(is_xml_codepoint(ord(s)) for s in codecs.decode(obj, 'UTF-16'))
except UnicodeDecodeError:
return False
# Fallback auto-detection with utf-16
try:
with urlopen(uri) as rp:
stream_reader = codecs.getreader('UTF-16')(rp)
for line in stream_reader:
if any(not is_xml_codepoint(ord(s)) for s in line):
return False
except (ValueError, URLError):
return False
else:
return True


@method(function('environment-variable', nargs=1,
Expand Down
4 changes: 2 additions & 2 deletions publiccode.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2'
name: elementpath
url: 'https://github.com/sissaschool/elementpath'
landingURL: 'https://github.com/sissaschool/elementpath'
releaseDate: '2024-02-03'
softwareVersion: v4.2.0
releaseDate: '2024-02-10'
softwareVersion: v4.2.1
developmentStatus: stable
platforms:
- linux
Expand Down
2 changes: 0 additions & 2 deletions tests/execute_w3c_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@
# For XP30+
'fn-root__K-NodeRootFunc-2', # includes a XPath 3.0 fn:generate-id()
'fn-codepoints-to-string__cbcl-codepoints-to-string-021', # Too long ...
'fn-unparsed-text__fn-unparsed-text-038', # Typo in filename
'fn-unparsed-text-lines__fn-unparsed-text-lines-038', # Typo in filename
'fn-serialize__serialize-xml-015b', # Do not raise, attribute is good
'fn-parse-xml-fragment__parse-xml-fragment-022-st', # conflict with parse-xml-fragment-022
'fn-for-each-pair__fn-for-each-pair-017', # Requires PI and comments parsing
Expand Down

0 comments on commit ffd19af

Please sign in to comment.