@@ -50,8 +50,9 @@ def merge(self, sr: "ScanResult"):
50
50
51
51
52
52
class GenOpsError (Exception ):
53
- def __init__ (self , msg : str ):
53
+ def __init__ (self , msg : str , globals : Optional [ Set [ Tuple [ str , str ]]] ):
54
54
self .msg = msg
55
+ self .globals = globals
55
56
super ().__init__ ()
56
57
57
58
def __str__ (self ) -> str :
@@ -177,16 +178,10 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
177
178
try :
178
179
ops = list (pickletools .genops (data ))
179
180
except Exception as e :
180
- # XXX: pickle will happily load files that contain arbitrarily placed new lines whereas pickletools errors in such cases.
181
- # below is code to circumvent or skip these newlines while succeeding at parsing the opcodes.
182
- err = str (e )
183
- if "opcode b'\\ n' unknown" not in err :
184
- raise GenOpsError (err )
185
- else :
186
- pos = int (err .split ("," )[0 ].replace ("at position " , "" ))
187
- data .seek (- (pos + 1 ), 1 )
188
- ops = list (pickletools .genops (data .read (pos )))
189
- data .seek (1 , 1 )
181
+ # XXX: given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
182
+ # Thus we return the already found globals in the error & to let the caller decide what to do.
183
+ globals_opt = globals if len (globals ) > 0 else None
184
+ raise GenOpsError (str (e ), globals_opt )
190
185
191
186
last_byte = data .read (1 )
192
187
data .seek (- 1 , 1 )
@@ -241,18 +236,12 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
241
236
return globals
242
237
243
238
244
- def scan_pickle_bytes (data : IO [bytes ], file_id , multiple_pickles = True ) -> ScanResult :
245
- """Disassemble a Pickle stream and report issues"""
246
-
239
+ def _build_scan_result_from_raw_globals (
240
+ raw_globals : Set [Tuple [str , str ]],
241
+ file_id ,
242
+ scan_err = False ,
243
+ ) -> ScanResult :
247
244
globals = []
248
- try :
249
- raw_globals = _list_globals (data , multiple_pickles )
250
- except GenOpsError as e :
251
- _log .error (f"ERROR: parsing pickle in { file_id } : { e } " )
252
- return ScanResult (globals , scan_err = True )
253
-
254
- _log .debug ("Global imports in %s: %s" , file_id , raw_globals )
255
-
256
245
issues_count = 0
257
246
for rg in raw_globals :
258
247
g = Global (rg [0 ], rg [1 ], SafetyLevel .Dangerous )
@@ -278,7 +267,26 @@ def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanRe
278
267
g .safety = SafetyLevel .Suspicious
279
268
globals .append (g )
280
269
281
- return ScanResult (globals , 1 , issues_count , 1 if issues_count > 0 else 0 , False )
270
+ return ScanResult (globals , 1 , issues_count , 1 if issues_count > 0 else 0 , scan_err )
271
+
272
+
273
+ def scan_pickle_bytes (data : IO [bytes ], file_id , multiple_pickles = True ) -> ScanResult :
274
+ """Disassemble a Pickle stream and report issues"""
275
+
276
+ try :
277
+ raw_globals = _list_globals (data , multiple_pickles )
278
+ except GenOpsError as e :
279
+ _log .error (f"ERROR: parsing pickle in { file_id } : { e } " )
280
+ if e .globals is not None :
281
+ return _build_scan_result_from_raw_globals (
282
+ e .globals , file_id , scan_err = True
283
+ )
284
+ else :
285
+ return ScanResult ([], scan_err = True )
286
+
287
+ _log .debug ("Global imports in %s: %s" , file_id , raw_globals )
288
+
289
+ return _build_scan_result_from_raw_globals (raw_globals , file_id )
282
290
283
291
284
292
def scan_zip_bytes (data : IO [bytes ], file_id ) -> ScanResult :
0 commit comments