Skip to content

Commit 908bb31

Browse files
committed
FEAT: ZIP codec (decode only so far)
It extract ZIP data into memory. Example: ``` >> load %/x/test.aar Decode ZIP data ( 2919 bytes ) [ZIP] Decompressing: AndroidManifest.xml bytes: 504 -> 832 [ZIP] Decompressing: R.txt bytes: 1535 -> 7180 [ZIP] Decompressing: classes.jar bytes: 120 -> 233 [ZIP] Decompressing: aidl/ [ZIP] Decompressing: assets/ [ZIP] Decompressing: jni/ [ZIP] Decompressing: res/ [ZIP] Decompressing: libs/ == [ %AndroidManifest.xml [4-Apr-2018/18:52:10 662885694 #{ 3C3F786D6C2076657273696F6E3D22312E302220656E636F64696E673D227574 662D38223F3E0A3C212D2D0A20436F7079726967687420284329203230313420 54686520416E64726F6964204F70656E20536F757263652050726F6A6563740A 0A20202020204C6963656E73656420756E646572207468652041706163686520 4C6963656E73652C2056657273696F6E20322E30202874686520224C6963656E 736522293B0A2020202020796F75206D6179206E6F7420757365207468697320 66696C652065786365707420696E20636F6D706C69616E636... ``` It's possible to use the ZIP codec directly and extract only specified files: ``` >> zip-decode: :system/codecs/zip/decode >> zip-decode/only %/x/test.aar [%classes.jar] Decode ZIP data ( 2919 bytes ) [ZIP] Decompressing: classes.jar bytes: 120 -> 233 == [ %classes.jar [4-Apr-2018/18:53:56 646121705 #{ 504B03040A000008000000002100F7D7D29907000000070000002F0015004D45 54412D494E462F636F6D2E616E64726F69642E737570706F72745F737570706F 72742D76342E76657273696F6E55540D0007000000000000000000000000FECA 000032372E312E310A504B01020A000A000008000000002100F7D7D299070000 00070000002F000D0000000000000000000000000000004D4554412D494E462F 636F6D2E616E64726F69642E737570706F72745F737570706F72742D76342E76 657273696F6E555405000700000000FECA0000504B050600000000010... ```
1 parent 67c8ea5 commit 908bb31

File tree

6 files changed

+274
-0
lines changed

6 files changed

+274
-0
lines changed

src/mezz/codec-zip.r

+256
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
REBOL [
2+
title: "REBOL 3 codec for ZIP files"
3+
name: 'codec-zip
4+
author: rights: "Oldes"
5+
version: 0.0.1
6+
specification: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
7+
history: [15-Mar-2019 "Oldes" {Initial version of the ZIP decoder}]
8+
]
9+
10+
register-codec [
11+
name: 'ZIP
12+
title: "ZIP File Format"
13+
suffixes: [%.zip %.aar %.jar %.apk %.zipx %.appx]
14+
15+
decode: wrap [
16+
;- privates
17+
buffer: none ; shared binary used for decompression
18+
name: none
19+
method: none
20+
crc:
21+
cmp-size:
22+
unc-size: 0
23+
24+
log-info: func[msg][
25+
if block? msg [msg: reform msg]
26+
print rejoin [" ^[[1;33m[ZIP] ^[[36m" msg "^[[0m"]
27+
]
28+
log-more: func[msg][
29+
if block? msg [msg: reform msg]
30+
print rejoin [" ^[[33m[ZIP] ^[[0;36m" msg "^[[0m"]
31+
]
32+
log-debug: func[msg][
33+
if block? msg [msg: reform msg]
34+
print rejoin [" ^[[33m[ZIP] ^[[0;32m" msg "^[[0m"]
35+
]
36+
37+
decompress*: func [
38+
data [binary! ]
39+
validate [logic!]
40+
/local output crc2
41+
][
42+
if verbose > 0 [
43+
log-info [
44+
"Decompressing: ^[[33m" name
45+
" ^[[0mbytes:^[[33m" cmp-size "^[[0m->^[[33m" unc-size
46+
]
47+
]
48+
switch/default method [
49+
8 [ ;- deflate
50+
;@@ TODO: decompress should be able to deflate even without the header bytes!
51+
unless buffer [ buffer: make binary! unc-size ]
52+
insert buffer #{789C} ;ZLIB header
53+
append buffer copy/part data cmp-size
54+
output: decompress/zlib/size buffer unc-size
55+
clear buffer
56+
]
57+
14 [ ;- LZMA
58+
output: decompress/lzma/part/size skip data 4 cmp-size unc-size
59+
]
60+
0 [ ;- store
61+
output: copy/part data cmp-size
62+
]
63+
][
64+
print [" [ZIP] ^[[1;35mUnsupported compression method:^[[0;35m" method "^[[0m"]
65+
]
66+
if all [
67+
validate
68+
output
69+
crc <> crc2: checksum/method output 'crc32
70+
][
71+
print [" [ZIP] ^[[1;35mCRC check failed!" crc "<>" crc2 "for file:^[[0;35m" name "^[[0m"]
72+
]
73+
output
74+
]
75+
76+
;- decode
77+
function/extern [
78+
{Decompress all content of the ZIP file}
79+
zip-data [binary! file! url!]
80+
/validate "Check if decompressed data has valid CRC"
81+
/only "Extract only specified files if found in the achive"
82+
files [block!] "Block with file names to extract"
83+
return: [block!] "Result is in format: [NAME [MODIFIED CRC DATA] ...]"
84+
] [
85+
unless binary? zip-data [
86+
zip-data: read zip-data
87+
]
88+
if verbose > 0 [
89+
print ["^[[1;32mDecode ZIP data^[[m (^[[1m" length? zip-data "^[[mbytes )"]
90+
]
91+
bin: binary zip-data
92+
93+
data-pos: 0
94+
95+
if only [ files-to-extract: length? files ]
96+
97+
result: make block! 32
98+
99+
while [pos: find bin/buffer #{504b}][
100+
pos: index? pos
101+
binary/read bin [AT :pos type: UI32LE]
102+
switch/default type [
103+
134695760 [ ;#{08074B50}
104+
if verbose > 1 [log-more "Data Descriptor"]
105+
binary/read bin [
106+
crc: SI32LE
107+
cmp-size: UI32LE ; compressed size
108+
unc-size: UI32LE ; uncompressed size
109+
]
110+
111+
if all [only not find files name][
112+
if verbose > 1 [log-debug "not extracting"]
113+
continue
114+
]
115+
116+
either all [name data-pos > 0] [
117+
data: decompress* at zip-data :data-pos any [validate validate-crc?]
118+
repend result [name reduce [modified crc data]]
119+
][
120+
if verbose > 0 [log-info ["Decompressing: ^[[33m" name]]
121+
repend result [name none]
122+
]
123+
124+
if only [
125+
-- files-to-extract
126+
if files-to-extract = 0 [break]
127+
]
128+
]
129+
67324752 [ ;#{04034B50}
130+
if verbose > 1 [log-more "Local file header"]
131+
header: binary/read bin [
132+
UI16LE ; version
133+
flags: BITSET16 ; flags
134+
method: UI16LE ; compression
135+
modified: MSDOS-DATETIME ; last modified
136+
crc: SI32LE ; crc-32
137+
cmp-size: UI32LE ; compressed size
138+
unc-size: UI32LE ; uncompressed size
139+
len-name: UI16LE
140+
len-extr: UI16LE
141+
name: BYTES :len-name
142+
extr: BYTES :len-extr
143+
data-pos: INDEX
144+
]
145+
if verbose > 2 [log-debug mold header]
146+
name: to file! name
147+
if all [
148+
flags/12 ; bit 3
149+
crc = 0
150+
cmp-size = 0
151+
unc-size = 0
152+
][
153+
; The correct values are put in the data descriptor
154+
; immediately following the compressed data.
155+
if verbose > 1 [log-debug "waiting for Data Descriptor"]
156+
continue
157+
]
158+
159+
if all [only not find files name][
160+
if verbose > 1 [log-debug "not extracting"]
161+
continue
162+
]
163+
164+
either all [unc-size > 0] [
165+
data: decompress* bin/buffer any [validate validate-crc?]
166+
repend result [name reduce [modified crc data]]
167+
][
168+
if verbose > 0 [log-info ["Decompressing: ^[[33m" name]]
169+
repend result [name none]
170+
]
171+
if only [
172+
-- files-to-extract
173+
if files-to-extract = 0 [break]
174+
]
175+
]
176+
33639248 [ ;#{02014B50}
177+
if verbose > 1 [log-more "Central directory structure"]
178+
cheader: binary/read bin [
179+
UI16LE ; version made by
180+
UI16LE ; version needed to extract
181+
BITSET16 ; general purpose bit flag
182+
UI16LE ; compression method
183+
modified: MSDOS-DATETIME ; last modified
184+
SI32LE ; crc-32
185+
UI32LE ; compressed size
186+
UI32LE ; uncompressed size
187+
len-name: UI16LE ; file name length
188+
len-extr: UI16LE ; extra field length
189+
len-comm: UI16LE ; file comment length
190+
disk-num: UI16LE ; disk number start
191+
att-int: UI16LE ; internal file attributes
192+
att-ext: UI32LE ; external file attributes
193+
offset: UI32LE ; relative offset of local header
194+
name: BYTES :len-name
195+
extr: BYTES :len-extr
196+
comm: BYTES :len-comm
197+
]
198+
if verbose > 2 [log-debug mold cheader]
199+
unless empty? comm [log-info ["Comment: ^[[33m" to-string comm "^[[0m" mold to file! name]]
200+
]
201+
101010256 [ ;#{06054B50}
202+
if verbose > 1 [log-more "End of central directory record"]
203+
data: binary/read bin [
204+
UI16LE ; number of this disk
205+
UI16LE ; number of the disk with the start of the central directory
206+
UI16LE ; total number of entries in the central directory on this disk
207+
UI16LE ; total number of entries in the central directory
208+
UI32LE ; size of the central directory
209+
UI32LE ; offset of start of central directory with respect to the starting disk number
210+
len: UI16LE ; .ZIP file comment length
211+
BYTES :len ; .ZIP file comment
212+
]
213+
if verbose > 2 [log-debug mold data]
214+
]
215+
101075792 [ ;#{06064b50}
216+
if verbose > 1 [log-more "Zip64 end of central directory record"]
217+
data: binary/read bin [
218+
UI64LE ; directory record
219+
UI16LE ; version made by
220+
UI16LE ; version needed to extract
221+
UI32LE ; number of this disk
222+
UI32LE ; number of the disk with the start of the central directory
223+
UI64LE ; total number of entries in the central directory on this disk
224+
UI64LE ; total number of entries in the central directory
225+
UI64LE ; size of the central directory
226+
UI64LE ; offset of start of central directory with respect to the starting disk number
227+
;@@BYTES ?? ; zip64 extensible data sector (variable size)
228+
]
229+
if verbose > 2 [log-debug mold data]
230+
]
231+
][
232+
if verbose > 1 [log-more ["Unknown ZIP signature:" mold skip to-binary type 4]]
233+
]
234+
]
235+
buffer: none ; cleanup
236+
new-line/skip result true 4
237+
result
238+
][
239+
; external `decode` variables
240+
name method crc cmp-size unc-size
241+
]
242+
]
243+
244+
;encode: function [data [binary!]][ ]
245+
246+
identify: function [data [binary!]][
247+
all [
248+
4 < length? data
249+
#"P" = data/1
250+
#"K" = data/2
251+
]
252+
]
253+
validate-crc?: true
254+
verbose: 1
255+
level: 9
256+
]

src/tests/units/codecs-test.r3

+18
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,22 @@ if find system/codecs 'swf [
7878
system/codecs/swf/verbose: 0
7979
]
8080

81+
if find system/codecs 'zip [
82+
system/codecs/zip/verbose: 3
83+
===start-group=== "ZIP codec"
84+
85+
--test-- "Load ZIP file"
86+
--assert block? load %units/files/test-lzma.zip
87+
--assert block? load %units/files/test-stored.zip
88+
--assert block? load %units/files/test-deflate.zip
89+
90+
--test-- "Decode ZIP using the codec directly"
91+
--assert block? data: system/codecs/zip/decode/only %units/files/test.aar [%classes.jar]
92+
--assert data/2/2 = 646121705
93+
--assert block? system/codecs/zip/decode data/2/3
94+
95+
===end-group===
96+
system/codecs/zip/verbose: 1
97+
]
98+
8199
~~~end-file~~~
358 Bytes
Binary file not shown.

src/tests/units/files/test-lzma.zip

623 Bytes
Binary file not shown.

src/tests/units/files/test-stored.zip

419 Bytes
Binary file not shown.

src/tests/units/files/test.aar

2.85 KB
Binary file not shown.

0 commit comments

Comments
 (0)