Skip to content

Commit baf03fa

Browse files
authored
Merge commit from fork
1 parent 13bb359 commit baf03fa

File tree

3 files changed

+29
-5
lines changed

3 files changed

+29
-5
lines changed

src/picklescan/scanner.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def __str__(self) -> str:
154154
_pytorch_file_extensions = {".bin", ".pt", ".pth", ".ckpt"}
155155
_pickle_file_extensions = {".pkl", ".pickle", ".joblib", ".dat", ".data"}
156156
_zip_file_extensions = {".zip", ".npz", ".7z"}
157+
_pickle_magic_bytes = {b"\x80\x00", b"\x80\x01", b"\x80\x02", b"\x80\x03", b"\x80\x04", b"\x80\x05"}
157158

158159

159160
def _is_7z_file(f: IO[bytes]) -> bool:
@@ -349,20 +350,30 @@ def scan_7z_bytes(data: IO[bytes], file_id) -> ScanResult:
349350

350351
return result
351352

353+
def get_magic_bytes_from_zipfile(zip: zipfile.ZipFile, num_bytes=8):
354+
magic_bytes = {}
355+
for file_info in zip.infolist():
356+
with zip.open(file_info.filename) as f:
357+
magic_bytes[file_info.filename] = f.read(num_bytes)
358+
359+
return magic_bytes
360+
352361

353362
def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
354363
result = ScanResult([])
355364

356365
with zipfile.ZipFile(data, "r") as zip:
366+
magic_bytes = get_magic_bytes_from_zipfile(zip)
357367
file_names = zip.namelist()
358368
_log.debug("Files in zip archive %s: %s", file_id, file_names)
359369
for file_name in file_names:
370+
magic_number = magic_bytes.get(file_name, b"")
360371
file_ext = os.path.splitext(file_name)[1]
361-
if file_ext in _pickle_file_extensions:
372+
if file_ext in _pickle_file_extensions or any(magic_number.startswith(mn) for mn in _pickle_magic_bytes):
362373
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
363374
with zip.open(file_name, "r") as file:
364375
result.merge(scan_pickle_bytes(file, f"{file_id}:{file_name}"))
365-
elif file_ext in _numpy_file_extensions:
376+
elif file_ext in _numpy_file_extensions or magic_number.startswith(b"\x93NUMPY"):
366377
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
367378
with zip.open(file_name, "r") as file:
368379
result.merge(scan_numpy(file, f"{file_id}:{file_name}"))

tests/data/malicious1_wrong_ext.zip

165 Bytes
Binary file not shown.

tests/test_scanner.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,12 @@ def initialize_pickle_files():
443443
pickle.dumps(Malicious1(), protocol=4),
444444
)
445445

446+
initialize_zip_file(
447+
f"{_root_path}/data/malicious1_wrong_ext.zip",
448+
"data.txt", # Pickle file with a non-standard extension
449+
pickle.dumps(Malicious1(), protocol=4),
450+
)
451+
446452
# Fake PyTorch file (PNG file format) simulating https://huggingface.co/RectalWorm/loras_new/blob/main/Owl_Mage_no_background.pt
447453
initialize_data_file(f"{_root_path}/data/bad_pytorch.pt", b"\211PNG\r\n\032\n")
448454

@@ -593,6 +599,12 @@ def test_scan_file_path():
593599
compare_scan_results(
594600
scan_file_path(f"{_root_path}/data/malicious1.zip"), malicious1
595601
)
602+
compare_scan_results(
603+
scan_file_path(f"{_root_path}/data/malicious1.7z"), malicious1
604+
)
605+
compare_scan_results(
606+
scan_file_path(f"{_root_path}/data/malicious1_wrong_ext.zip"), malicious1
607+
)
596608

597609
malicious2 = ScanResult([Global("posix", "system", SafetyLevel.Dangerous)], 1, 1, 1)
598610
compare_scan_results(
@@ -772,10 +784,11 @@ def test_scan_directory_path():
772784
Global("builtins", "exec", SafetyLevel.Dangerous),
773785
Global("builtins", "eval", SafetyLevel.Dangerous),
774786
Global("pip", "main", SafetyLevel.Dangerous),
787+
Global("builtins", "eval", SafetyLevel.Dangerous),
775788
],
776-
scanned_files=33,
777-
issues_count=33,
778-
infected_files=28,
789+
scanned_files=34,
790+
issues_count=34,
791+
infected_files=29,
779792
scan_err=True,
780793
)
781794
compare_scan_results(scan_directory_path(f"{_root_path}/data/"), sr)

0 commit comments

Comments
 (0)