|
| 1 | +# defusedxml |
| 2 | +# |
| 3 | +# Copyright (c) 2013-2020 by Christian Heimes <christian@python.org> |
| 4 | +# Licensed to PSF under a Contributor Agreement. |
| 5 | +# See https://www.python.org/psf/license for licensing details. |
| 6 | +"""Defused xml.etree.ElementTree facade |
| 7 | +""" |
| 8 | +from __future__ import print_function, absolute_import |
| 9 | + |
| 10 | +import sys |
| 11 | +import warnings |
| 12 | +from xml.etree.ElementTree import ParseError |
| 13 | +from xml.etree.ElementTree import TreeBuilder as _TreeBuilder |
| 14 | +from xml.etree.ElementTree import parse as _parse |
| 15 | +from xml.etree.ElementTree import tostring |
| 16 | + |
| 17 | +import importlib |
| 18 | + |
| 19 | + |
| 20 | +from .common import DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden |
| 21 | + |
| 22 | +__origin__ = "xml.etree.ElementTree" |
| 23 | + |
| 24 | + |
| 25 | +def _get_py3_cls(): |
| 26 | + """Python 3.3 hides the pure Python code but defusedxml requires it. |
| 27 | +
|
| 28 | + The code is based on test.support.import_fresh_module(). |
| 29 | + """ |
| 30 | + pymodname = "xml.etree.ElementTree" |
| 31 | + cmodname = "_elementtree" |
| 32 | + |
| 33 | + pymod = sys.modules.pop(pymodname, None) |
| 34 | + cmod = sys.modules.pop(cmodname, None) |
| 35 | + |
| 36 | + sys.modules[cmodname] = None |
| 37 | + try: |
| 38 | + pure_pymod = importlib.import_module(pymodname) |
| 39 | + finally: |
| 40 | + # restore module |
| 41 | + sys.modules[pymodname] = pymod |
| 42 | + if cmod is not None: |
| 43 | + sys.modules[cmodname] = cmod |
| 44 | + else: |
| 45 | + sys.modules.pop(cmodname, None) |
| 46 | + # restore attribute on original package |
| 47 | + etree_pkg = sys.modules["xml.etree"] |
| 48 | + if pymod is not None: |
| 49 | + etree_pkg.ElementTree = pymod |
| 50 | + elif hasattr(etree_pkg, "ElementTree"): |
| 51 | + del etree_pkg.ElementTree |
| 52 | + |
| 53 | + _XMLParser = pure_pymod.XMLParser |
| 54 | + _iterparse = pure_pymod.iterparse |
| 55 | + # patch pure module to use ParseError from C extension |
| 56 | + pure_pymod.ParseError = ParseError |
| 57 | + |
| 58 | + return _XMLParser, _iterparse |
| 59 | + |
| 60 | + |
| 61 | +_XMLParser, _iterparse = _get_py3_cls() |
| 62 | + |
| 63 | +_sentinel = object() |
| 64 | + |
| 65 | + |
| 66 | +class DefusedXMLParser(_XMLParser): |
| 67 | + def __init__( |
| 68 | + self, |
| 69 | + html=_sentinel, |
| 70 | + target=None, |
| 71 | + encoding=None, |
| 72 | + forbid_dtd=False, |
| 73 | + forbid_entities=True, |
| 74 | + forbid_external=True, |
| 75 | + ): |
| 76 | + super().__init__(target=target, encoding=encoding) |
| 77 | + if html is not _sentinel: |
| 78 | + # the 'html' argument has been deprecated and ignored in all |
| 79 | + # supported versions of Python. Python 3.8 finally removed it. |
| 80 | + if html: |
| 81 | + raise TypeError("'html=True' is no longer supported.") |
| 82 | + else: |
| 83 | + warnings.warn( |
| 84 | + "'html' keyword argument is no longer supported. Pass " |
| 85 | + "in arguments as keyword arguments.", |
| 86 | + category=DeprecationWarning, |
| 87 | + ) |
| 88 | + |
| 89 | + self.forbid_dtd = forbid_dtd |
| 90 | + self.forbid_entities = forbid_entities |
| 91 | + self.forbid_external = forbid_external |
| 92 | + parser = self.parser |
| 93 | + if self.forbid_dtd: |
| 94 | + parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl |
| 95 | + if self.forbid_entities: |
| 96 | + parser.EntityDeclHandler = self.defused_entity_decl |
| 97 | + parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl |
| 98 | + if self.forbid_external: |
| 99 | + parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler |
| 100 | + |
| 101 | + def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset): |
| 102 | + raise DTDForbidden(name, sysid, pubid) |
| 103 | + |
| 104 | + def defused_entity_decl( |
| 105 | + self, name, is_parameter_entity, value, base, sysid, pubid, notation_name |
| 106 | + ): |
| 107 | + raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name) |
| 108 | + |
| 109 | + def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): |
| 110 | + # expat 1.2 |
| 111 | + raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) # pragma: no cover |
| 112 | + |
| 113 | + def defused_external_entity_ref_handler(self, context, base, sysid, pubid): |
| 114 | + raise ExternalReferenceForbidden(context, base, sysid, pubid) |
| 115 | + |
| 116 | + |
| 117 | +# aliases |
| 118 | +# XMLParse is a typo, keep it for backwards compatibility |
| 119 | +XMLTreeBuilder = XMLParse = XMLParser = DefusedXMLParser |
| 120 | + |
| 121 | + |
| 122 | +def parse(source, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): |
| 123 | + if parser is None: |
| 124 | + parser = DefusedXMLParser( |
| 125 | + target=_TreeBuilder(), |
| 126 | + forbid_dtd=forbid_dtd, |
| 127 | + forbid_entities=forbid_entities, |
| 128 | + forbid_external=forbid_external, |
| 129 | + ) |
| 130 | + return _parse(source, parser) |
| 131 | + |
| 132 | + |
| 133 | +def iterparse( |
| 134 | + source, |
| 135 | + events=None, |
| 136 | + parser=None, |
| 137 | + forbid_dtd=False, |
| 138 | + forbid_entities=True, |
| 139 | + forbid_external=True, |
| 140 | +): |
| 141 | + if parser is None: |
| 142 | + parser = DefusedXMLParser( |
| 143 | + target=_TreeBuilder(), |
| 144 | + forbid_dtd=forbid_dtd, |
| 145 | + forbid_entities=forbid_entities, |
| 146 | + forbid_external=forbid_external, |
| 147 | + ) |
| 148 | + return _iterparse(source, events, parser) |
| 149 | + |
| 150 | + |
| 151 | +def fromstring(text, forbid_dtd=False, forbid_entities=True, forbid_external=True): |
| 152 | + parser = DefusedXMLParser( |
| 153 | + target=_TreeBuilder(), |
| 154 | + forbid_dtd=forbid_dtd, |
| 155 | + forbid_entities=forbid_entities, |
| 156 | + forbid_external=forbid_external, |
| 157 | + ) |
| 158 | + parser.feed(text) |
| 159 | + return parser.close() |
| 160 | + |
| 161 | + |
| 162 | +XML = fromstring |
| 163 | + |
| 164 | + |
| 165 | +def fromstringlist(sequence, forbid_dtd=False, forbid_entities=True, forbid_external=True): |
| 166 | + parser = DefusedXMLParser( |
| 167 | + target=_TreeBuilder(), |
| 168 | + forbid_dtd=forbid_dtd, |
| 169 | + forbid_entities=forbid_entities, |
| 170 | + forbid_external=forbid_external, |
| 171 | + ) |
| 172 | + for text in sequence: |
| 173 | + parser.feed(text) |
| 174 | + return parser.close() |
| 175 | + |
| 176 | + |
| 177 | +__all__ = [ |
| 178 | + "ParseError", |
| 179 | + "XML", |
| 180 | + "XMLParse", |
| 181 | + "XMLParser", |
| 182 | + "XMLTreeBuilder", |
| 183 | + "fromstring", |
| 184 | + "fromstringlist", |
| 185 | + "iterparse", |
| 186 | + "parse", |
| 187 | + "tostring", |
| 188 | +] |
0 commit comments