Skip to content

Commit f34d091

Browse files
Bump version to 0.0.22 and fix tests (#37)
Co-authored-by: Matthieu Maitre <mmaitre@microsoft.com>
1 parent 93764d6 commit f34d091

File tree

3 files changed

+53
-36
lines changed

3 files changed

+53
-36
lines changed

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = picklescan
3-
version = 0.0.21
3+
version = 0.0.22
44
author = Matthieu Maitre
55
author_email = mmaitre314@users.noreply.github.com
66
description = Security scanner detecting Python Pickle files performing suspicious actions

src/picklescan/scanner.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ def __str__(self) -> str:
130130
"bdb": "*",
131131
"pdb": "*",
132132
"asyncio": "*",
133-
"pydoc": "pipepager", # pydoc.pipepager('help','echo pwned')
133+
"pydoc": "pipepager", # pydoc.pipepager('help','echo pwned')
134134
"venv": "*",
135-
"torch.serialization": "load", # pickle could be used to load a different file
136-
"functools": "partial", # functools.partial(os.system, "echo pwned")
137-
"torch._inductor.codecache": "compile_file", # compile_file('', '', ['sh', '-c','$(echo pwned)'])
135+
"torch.serialization": "load", # pickle could be used to load a different file
136+
"functools": "partial", # functools.partial(os.system, "echo pwned")
137+
"torch._inductor.codecache": "compile_file", # compile_file('', '', ['sh', '-c','$(echo pwned)'])
138138
"pip": "*",
139139
}
140140

@@ -159,7 +159,14 @@ def __str__(self) -> str:
159159
_pytorch_file_extensions = {".bin", ".pt", ".pth", ".ckpt"}
160160
_pickle_file_extensions = {".pkl", ".pickle", ".joblib", ".dat", ".data"}
161161
_zip_file_extensions = {".zip", ".npz", ".7z"}
162-
_pickle_magic_bytes = {b"\x80\x00", b"\x80\x01", b"\x80\x02", b"\x80\x03", b"\x80\x04", b"\x80\x05"}
162+
_pickle_magic_bytes = {
163+
b"\x80\x00",
164+
b"\x80\x01",
165+
b"\x80\x02",
166+
b"\x80\x03",
167+
b"\x80\x04",
168+
b"\x80\x05",
169+
}
163170

164171

165172
def _is_7z_file(f: IO[bytes]) -> bool:
@@ -355,6 +362,7 @@ def scan_7z_bytes(data: IO[bytes], file_id) -> ScanResult:
355362

356363
return result
357364

365+
358366
def get_magic_bytes_from_zipfile(zip: zipfile.ZipFile, num_bytes=8):
359367
magic_bytes = {}
360368
for file_info in zip.infolist():
@@ -374,11 +382,15 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
374382
for file_name in file_names:
375383
magic_number = magic_bytes.get(file_name, b"")
376384
file_ext = os.path.splitext(file_name)[1]
377-
if file_ext in _pickle_file_extensions or any(magic_number.startswith(mn) for mn in _pickle_magic_bytes):
385+
if file_ext in _pickle_file_extensions or any(
386+
magic_number.startswith(mn) for mn in _pickle_magic_bytes
387+
):
378388
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
379389
with zip.open(file_name, "r") as file:
380390
result.merge(scan_pickle_bytes(file, f"{file_id}:{file_name}"))
381-
elif file_ext in _numpy_file_extensions or magic_number.startswith(b"\x93NUMPY"):
391+
elif file_ext in _numpy_file_extensions or magic_number.startswith(
392+
b"\x93NUMPY"
393+
):
382394
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
383395
with zip.open(file_name, "r") as file:
384396
result.merge(scan_numpy(file, f"{file_id}:{file_name}"))

tests/test_scanner.py

+33-28
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
import os
77
import pathlib
88
import pickle
9+
import pip
910
import py7zr
11+
import pydoc
1012
import pytest
1113
import requests
1214
import runpy
1315
import socket
1416
import subprocess
1517
import sys
16-
from unittest import TestCase
18+
import venv
1719
import zipfile
18-
import pip
20+
from functools import partial
21+
from unittest import TestCase
1922

2023
from picklescan.cli import main
2124
from picklescan.scanner import (
@@ -33,11 +36,14 @@
3336
scan_numpy,
3437
scan_pytorch,
3538
)
36-
import torch
37-
import torch._inductor.codecache as codecache
38-
import pydoc
39-
import venv
40-
from functools import partial
39+
40+
try:
41+
import torch
42+
import torch._inductor.codecache as codecache
43+
except ImportError:
44+
# If PyTorch test files need to be regenerated, run 'pip install torch==2.6.0' first
45+
torch = None
46+
codecache = None
4147

4248
_root_path = os.path.dirname(__file__)
4349

@@ -105,20 +111,16 @@ def __reduce__(self):
105111
'import os\nos.system("whoami")',
106112
)
107113

114+
108115
class Malicious17:
109116
def __reduce__(self):
110-
return codecache.compile_file, (
111-
'', '', [
112-
'sh',
113-
'-c','$(echo "pwned")'
114-
]
115-
)
117+
return codecache.compile_file, ("", "", ["sh", "-c", '$(echo "pwned")'])
118+
116119

117120
class Malicious18:
118121
def __reduce__(self):
119-
return pydoc.pipepager, (
120-
'', 'echo "pwned"'
121-
)
122+
return pydoc.pipepager, ("", 'echo "pwned"')
123+
122124

123125
class Malicious19:
124126
def __init__(self, path, **kwargs):
@@ -128,11 +130,10 @@ def __init__(self, path, **kwargs):
128130
def __reduce__(self):
129131
return partial(torch.load, self.path, **self.kwargs), ()
130132

133+
131134
class Malicious20:
132135
def __reduce__(self):
133-
return venv.create, (
134-
'venv', False, False, True, False, "$(echo pwned)"
135-
)
136+
return venv.create, ("venv", False, False, True, False, "$(echo pwned)")
136137

137138

138139
class Malicious16:
@@ -469,8 +470,13 @@ def initialize_pickle_files():
469470
initialize_pickle_file(f"{_root_path}/data/malicious18.pkl", Malicious18(), 4)
470471

471472
# This exploit serializes kwargs and passes them into a torch.load call
472-
initialize_pickle_file(f"{_root_path}/data/malicious19.pkl",
473-
Malicious19("some_other_model.bin", pickle_file='config.json', weights_only=False), 4)
473+
initialize_pickle_file(
474+
f"{_root_path}/data/malicious19.pkl",
475+
Malicious19(
476+
"some_other_model.bin", pickle_file="config.json", weights_only=False
477+
),
478+
4,
479+
)
474480

475481
initialize_pickle_file(f"{_root_path}/data/malicious20.pkl", Malicious20(), 4)
476482
initialize_7z_file(
@@ -486,7 +492,7 @@ def initialize_pickle_files():
486492

487493
initialize_zip_file(
488494
f"{_root_path}/data/malicious1_wrong_ext.zip",
489-
"data.txt", # Pickle file with a non-standard extension
495+
"data.txt", # Pickle file with a non-standard extension
490496
pickle.dumps(Malicious1(), protocol=4),
491497
)
492498

@@ -640,9 +646,7 @@ def test_scan_file_path():
640646
compare_scan_results(
641647
scan_file_path(f"{_root_path}/data/malicious1.zip"), malicious1
642648
)
643-
compare_scan_results(
644-
scan_file_path(f"{_root_path}/data/malicious1.7z"), malicious1
645-
)
649+
compare_scan_results(scan_file_path(f"{_root_path}/data/malicious1.7z"), malicious1)
646650
compare_scan_results(
647651
scan_file_path(f"{_root_path}/data/malicious1_wrong_ext.zip"), malicious1
648652
)
@@ -830,10 +834,11 @@ def test_scan_directory_path():
830834
Global("torch.serialization", "load", SafetyLevel.Dangerous),
831835
Global("functools", "partial", SafetyLevel.Dangerous),
832836
Global("pip", "main", SafetyLevel.Dangerous),
837+
Global("builtins", "eval", SafetyLevel.Dangerous),
833838
],
834-
scanned_files=37,
835-
issues_count=38,
836-
infected_files=31,
839+
scanned_files=38,
840+
issues_count=39,
841+
infected_files=33,
837842
scan_err=True,
838843
)
839844
compare_scan_results(scan_directory_path(f"{_root_path}/data/"), sr)

0 commit comments

Comments
 (0)