Skip to content

Commit

Permalink
smscalls: allow large XML files as input
Browse files Browse the repository at this point in the history
once XML files increase past a certain size
(was about 220MB for me), the parser just throws
an error because the tree is too large (iirc for
security reasons)

could maybe look at using iterparse in the future
to parse it without loading the whole file, but this
seems to fix it fine for me
  • Loading branch information
purarue authored and karlicoss committed Dec 28, 2024
1 parent d8c53bd commit f1d23c5
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions my/smscalls.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ def from_me(self) -> bool:
# The '(Unknown)' is just what my android phone does, not sure if there are others
UNKNOWN: set[str] = {'(Unknown)'}

def _parse_xml(xml: Path) -> Any:
return etree.parse(str(xml), parser=etree.XMLParser(huge_tree=True))


def _extract_calls(path: Path) -> Iterator[Res[Call]]:
tr = etree.parse(str(path))
tr = _parse_xml(path)
for cxml in tr.findall('call'):
dt = cxml.get('date')
dt_readable = cxml.get('readable_date')
Expand Down Expand Up @@ -133,7 +136,7 @@ def messages() -> Iterator[Res[Message]]:


def _extract_messages(path: Path) -> Iterator[Res[Message]]:
tr = etree.parse(str(path))
tr = _parse_xml(path)
for mxml in tr.findall('sms'):
dt = mxml.get('date')
dt_readable = mxml.get('readable_date')
Expand Down Expand Up @@ -225,8 +228,7 @@ def _resolve_null_str(value: str | None) -> str | None:


def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
tr = etree.parse(str(path))

tr = _parse_xml(path)
for mxml in tr.findall('mms'):
dt = mxml.get('date')
dt_readable = mxml.get('readable_date')
Expand Down Expand Up @@ -271,10 +273,7 @@ def _extract_mms(path: Path) -> Iterator[Res[MMS]]:
#
# This seems pretty useless, so we should try and skip it, and just return the
# text/images/data
#
# man, attrib is some internal cpython ._Attrib type which can't
# be typed by any sort of mappingproxy. maybe a protocol could work..?
part_data: dict[str, Any] = part.attrib # type: ignore
part_data: dict[str, Any] = part.attrib
seq: str | None = part_data.get('seq')
if seq == '-1':
continue
Expand Down

0 comments on commit f1d23c5

Please sign in to comment.