-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3176 from frostming/non-ascii-toml
Fix non-ASCII support for prettytoml
- Loading branch information
Showing
5 changed files
with
201 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Handle non-ASCII characters correctly in TOML. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 132 additions & 0 deletions
132
tasks/vendoring/patches/patched/prettytoml-unicode.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
diff --git a/pipenv/patched/prettytoml/tokens/py2toml.py b/pipenv/patched/prettytoml/tokens/py2toml.py | ||
index 8299195..2decd02 100644 | ||
--- a/pipenv/patched/prettytoml/tokens/py2toml.py | ||
+++ b/pipenv/patched/prettytoml/tokens/py2toml.py | ||
@@ -2,6 +2,7 @@ | ||
""" | ||
A converter of python values to TOML Token instances. | ||
""" | ||
+from __future__ import unicode_literals | ||
import codecs | ||
import datetime | ||
import six | ||
@@ -81,10 +82,30 @@ def create_string_token(text, bare_string_allowed=False, multiline_strings_allow | ||
|
||
|
||
def _escape_single_line_quoted_string(text): | ||
- if six.PY2: | ||
- return text.encode('unicode-escape').encode('string-escape').replace('"', '\\"').replace("\\'", "'") | ||
- else: | ||
- return codecs.encode(text, 'unicode-escape').decode().replace('"', '\\"') | ||
+ text = text.decode('utf-8') if isinstance(text, six.binary_type) else text | ||
+ start = 0 | ||
+ i = 0 | ||
+ res = [] | ||
+ _escapes = {'\n': '\\n', '\r': '\\r', '\\': '\\\\', '\t': '\\t', | ||
+ '\b': '\\b', '\f': '\\f', '"': '\\"'} | ||
+ | ||
+ def flush(): | ||
+ if start < i: | ||
+ res.append(text[start:i]) | ||
+ return i + 1 | ||
+ | ||
+ while i < len(text): | ||
+ c = text[i] | ||
+ if c in _escapes: | ||
+ start = flush() | ||
+ res.append(_escapes[c]) | ||
+ elif ord(c) < 0x20: | ||
+ start = flush() | ||
+ res.append('\\u%04x' % ord(c)) | ||
+ i += 1 | ||
+ | ||
+ flush() | ||
+ return ''.join(res) | ||
|
||
|
||
def _create_multiline_string_token(text): | ||
diff --git a/pipenv/patched/prettytoml/tokens/toml2py.py b/pipenv/patched/prettytoml/tokens/toml2py.py | ||
index 2bf9c1c..5680443 100644 | ||
--- a/pipenv/patched/prettytoml/tokens/toml2py.py | ||
+++ b/pipenv/patched/prettytoml/tokens/toml2py.py | ||
@@ -1,3 +1,4 @@ | ||
+from __future__ import unicode_literals | ||
import re | ||
import string | ||
import iso8601 | ||
@@ -39,42 +40,40 @@ def _unescape_str(text): | ||
""" | ||
Unescapes a string according the TOML spec. Raises BadEscapeCharacter when appropriate. | ||
""" | ||
- | ||
- # Detect bad escape jobs | ||
- bad_escape_regexp = re.compile(r'([^\\]|^)\\[^btnfr"\\uU]') | ||
- if bad_escape_regexp.findall(text): | ||
- raise BadEscapeCharacter | ||
- | ||
- # Do the unescaping | ||
- if six.PY2: | ||
- return _unicode_escaped_string(text).decode('string-escape').decode('unicode-escape') | ||
- else: | ||
- return codecs.decode(_unicode_escaped_string(text), 'unicode-escape') | ||
- | ||
- | ||
-def _unicode_escaped_string(text): | ||
- """ | ||
- Escapes all unicode characters in the given string | ||
- """ | ||
- | ||
- if six.PY2: | ||
- text = unicode(text) | ||
- | ||
- def is_unicode(c): | ||
- return c.lower() not in string.ascii_letters + string.whitespace + string.punctuation + string.digits | ||
- | ||
- def escape_unicode_char(x): | ||
- if six.PY2: | ||
- return x.encode('unicode-escape') | ||
+ text = text.decode('utf-8') if isinstance(text, six.binary_type) else text | ||
+ tokens = [] | ||
+ i = 0 | ||
+ basicstr_re = re.compile(r'[^"\\\000-\037]*') | ||
+ unicode_re = re.compile(r'[uU]((?<=u)[a-fA-F0-9]{4}|(?<=U)[a-fA-F0-9]{8})') | ||
+ escapes = { | ||
+ 'b': '\b', | ||
+ 't': '\t', | ||
+ 'n': '\n', | ||
+ 'f': '\f', | ||
+ 'r': '\r', | ||
+ '\\': '\\', | ||
+ '"': '"', | ||
+ '/': '/', | ||
+ "'": "'" | ||
+ } | ||
+ while True: | ||
+ m = basicstr_re.match(text, i) | ||
+ i = m.end() | ||
+ tokens.append(m.group()) | ||
+ if i == len(text) or text[i] != '\\': | ||
+ break | ||
else: | ||
- return codecs.encode(x, 'unicode-escape') | ||
- | ||
- if any(is_unicode(c) for c in text): | ||
- homogeneous_chars = tuple(escape_unicode_char(c) if is_unicode(c) else c.encode() for c in text) | ||
- homogeneous_bytes = functools.reduce(operator.add, homogeneous_chars) | ||
- return homogeneous_bytes.decode() | ||
- else: | ||
- return text | ||
+ i += 1 | ||
+ if unicode_re.match(text, i): | ||
+ m = unicode_re.match(text, i) | ||
+ i = m.end() | ||
+ tokens.append(six.unichr(int(m.group(1), 16))) | ||
+ else: | ||
+ if text[i] not in escapes: | ||
+ raise BadEscapeCharacter | ||
+ tokens.append(escapes[text[i]]) | ||
+ i += 1 | ||
+ return ''.join(tokens) | ||
|
||
|
||
def _to_string(token): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters