@@ -50,8 +50,9 @@ def merge(self, sr: "ScanResult"):
50
50
51
51
52
52
class GenOpsError (Exception ):
53
- def __init__ (self , msg : str ):
53
+ def __init__ (self , msg : str , globals : Optional [ Set [ Tuple [ str , str ]]] ):
54
54
self .msg = msg
55
+ self .globals = globals
55
56
super ().__init__ ()
56
57
57
58
def __str__ (self ) -> str :
@@ -167,7 +168,6 @@ def _http_get(url) -> bytes:
167
168
168
169
169
170
def _list_globals (data : IO [bytes ], multiple_pickles = True ) -> Set [Tuple [str , str ]]:
170
-
171
171
globals = set ()
172
172
173
173
memo = {}
@@ -178,7 +178,11 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
178
178
try :
179
179
ops = list (pickletools .genops (data ))
180
180
except Exception as e :
181
- raise GenOpsError (str (e ))
181
+ # XXX: given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
182
+ # Thus return the already found globals in the error & let the caller decide what to do.
183
+ globals_opt = globals if len (globals ) > 0 else None
184
+ raise GenOpsError (str (e ), globals_opt )
185
+
182
186
last_byte = data .read (1 )
183
187
data .seek (- 1 , 1 )
184
188
@@ -232,18 +236,12 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
232
236
return globals
233
237
234
238
235
- def scan_pickle_bytes (data : IO [bytes ], file_id , multiple_pickles = True ) -> ScanResult :
236
- """Disassemble a Pickle stream and report issues"""
237
-
239
+ def _build_scan_result_from_raw_globals (
240
+ raw_globals : Set [Tuple [str , str ]],
241
+ file_id ,
242
+ scan_err = False ,
243
+ ) -> ScanResult :
238
244
globals = []
239
- try :
240
- raw_globals = _list_globals (data , multiple_pickles )
241
- except GenOpsError as e :
242
- _log .error (f"ERROR: parsing pickle in { file_id } : { e } " )
243
- return ScanResult (globals , scan_err = True )
244
-
245
- _log .debug ("Global imports in %s: %s" , file_id , raw_globals )
246
-
247
245
issues_count = 0
248
246
for rg in raw_globals :
249
247
g = Global (rg [0 ], rg [1 ], SafetyLevel .Dangerous )
@@ -269,7 +267,26 @@ def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanRe
269
267
g .safety = SafetyLevel .Suspicious
270
268
globals .append (g )
271
269
272
- return ScanResult (globals , 1 , issues_count , 1 if issues_count > 0 else 0 , False )
270
+ return ScanResult (globals , 1 , issues_count , 1 if issues_count > 0 else 0 , scan_err )
271
+
272
+
273
+ def scan_pickle_bytes (data : IO [bytes ], file_id , multiple_pickles = True ) -> ScanResult :
274
+ """Disassemble a Pickle stream and report issues"""
275
+
276
+ try :
277
+ raw_globals = _list_globals (data , multiple_pickles )
278
+ except GenOpsError as e :
279
+ _log .error (f"ERROR: parsing pickle in { file_id } : { e } " )
280
+ if e .globals is not None :
281
+ return _build_scan_result_from_raw_globals (
282
+ e .globals , file_id , scan_err = True
283
+ )
284
+ else :
285
+ return ScanResult ([], scan_err = True )
286
+
287
+ _log .debug ("Global imports in %s: %s" , file_id , raw_globals )
288
+
289
+ return _build_scan_result_from_raw_globals (raw_globals , file_id )
273
290
274
291
275
292
def scan_zip_bytes (data : IO [bytes ], file_id ) -> ScanResult :
@@ -288,7 +305,6 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
288
305
289
306
290
307
def scan_numpy (data : IO [bytes ], file_id ) -> ScanResult :
291
-
292
308
# Delay import to avoid dependency on NumPy
293
309
import numpy as np
294
310
0 commit comments