Skip to content

Commit 133bac0

Browse files
committed
fix: handle arbitrary newline terminators
1 parent 3b88ca2 commit 133bac0

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/picklescan/scanner.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ def _http_get(url) -> bytes:
168168

169169

170170
def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]]:
171-
172171
globals = set()
173172

174173
memo = {}
@@ -179,7 +178,17 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
179178
try:
180179
ops = list(pickletools.genops(data))
181180
except Exception as e:
182-
raise GenOpsError(str(e))
181+
# XXX: pickle will happily load files that contain arbitrarily placed new lines whereas pickletools errors in such cases.
182+
# below is code to circumvent or skip these newlines while succeeding at parsing the opcodes.
183+
err = str(e)
184+
if "opcode b'\\n' unknown" not in err:
185+
raise GenOpsError(err)
186+
else:
187+
pos = int(err.split(",")[0].replace("at position ", ""))
188+
data.seek(-(pos + 1), 1)
189+
ops = list(pickletools.genops(data.read(pos)))
190+
data.seek(1, 1)
191+
183192
last_byte = data.read(1)
184193
data.seek(-1, 1)
185194

@@ -288,7 +297,6 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
288297

289298

290299
def scan_numpy(data: IO[bytes], file_id) -> ScanResult:
291-
292300
# Delay import to avoid dependency on NumPy
293301
import numpy as np
294302

0 commit comments

Comments
 (0)