Skip to content

Commit f5edabf

Browse files
committed
fix: handle arbitrary newline terminators
1 parent 12a1353 commit f5edabf

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/picklescan/scanner.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ def _http_get(url) -> bytes:
167167

168168

169169
def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]]:
170-
171170
globals = set()
172171

173172
memo = {}
@@ -178,7 +177,17 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
178177
try:
179178
ops = list(pickletools.genops(data))
180179
except Exception as e:
181-
raise GenOpsError(str(e))
180+
# XXX: pickle will happily load files that contain arbitrarily placed new lines whereas pickletools errors in such cases.
181+
# below is code to circumvent or skip these newlines while succeeding at parsing the opcodes.
182+
err = str(e)
183+
if "opcode b'\\n' unknown" not in err:
184+
raise GenOpsError(err)
185+
else:
186+
pos = int(err.split(",")[0].replace("at position ", ""))
187+
data.seek(-(pos + 1), 1)
188+
ops = list(pickletools.genops(data.read(pos)))
189+
data.seek(1, 1)
190+
182191
last_byte = data.read(1)
183192
data.seek(-1, 1)
184193

@@ -288,7 +297,6 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
288297

289298

290299
def scan_numpy(data: IO[bytes], file_id) -> ScanResult:
291-
292300
# Delay import to avoid dependency on NumPy
293301
import numpy as np
294302

0 commit comments

Comments
 (0)