1
+ REBOL [
2
+ title: "REBOL 3 codec for ZIP files"
3
+ name: 'codec-zip
4
+ author: rights: "Oldes"
5
+ version: 0.0.1
6
+ specification: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
7
+ history: [15-Mar-2019 "Oldes" {Initial version of the ZIP decoder} ]
8
+ ]
9
+
10
+ register-codec [
11
+ name: 'ZIP
12
+ title: "ZIP File Format"
13
+ suffixes: [%.zip %.aar %.jar %.apk %.zipx %.appx ]
14
+
15
+ decode: wrap [
16
+ ;- privates
17
+ buffer: none ; shared binary used for decompression
18
+ name: none
19
+ method: none
20
+ crc:
21
+ cmp-size:
22
+ unc-size: 0
23
+
24
+ log-info : func [ msg] [
25
+ if block? msg [msg: reform msg]
26
+ print rejoin [" ^[ [1;33m[ZIP] ^[ [36m" msg "^[ [0m" ]
27
+ ]
28
+ log-more : func [ msg] [
29
+ if block? msg [msg: reform msg]
30
+ print rejoin [" ^[ [33m[ZIP] ^[ [0;36m" msg "^[ [0m" ]
31
+ ]
32
+ log-debug : func [ msg] [
33
+ if block? msg [msg: reform msg]
34
+ print rejoin [" ^[ [33m[ZIP] ^[ [0;32m" msg "^[ [0m" ]
35
+ ]
36
+
37
+ decompress* : func [
38
+ data [binary! ]
39
+ validate [logic! ]
40
+ /local output crc2
41
+ ] [
42
+ if verbose > 0 [
43
+ log-info [
44
+ "Decompressing: ^[ [33m" name
45
+ " ^[ [0mbytes:^[ [33m" cmp-size "^[ [0m->^[ [33m" unc-size
46
+ ]
47
+ ]
48
+ switch /default method [
49
+ 8 [ ;- deflate
50
+ ;@@ TODO: decompress should be able to deflate even without the header bytes!
51
+ unless buffer [ buffer: make binary! unc-size ]
52
+ insert buffer #{ 789C } ;ZLIB header
53
+ append buffer copy/part data cmp-size
54
+ output: decompress /zlib/size buffer unc-size
55
+ clear buffer
56
+ ]
57
+ 14 [ ;- LZMA
58
+ output: decompress /lzma/part/size skip data 4 cmp-size unc-size
59
+ ]
60
+ 0 [ ;- store
61
+ output: copy/part data cmp-size
62
+ ]
63
+ ][
64
+ print [" [ZIP] ^[ [1;35mUnsupported compression method:^[ [0;35m" method "^[ [0m" ]
65
+ ]
66
+ if all [
67
+ validate
68
+ output
69
+ crc <> crc2: checksum /method output 'crc32
70
+ ][
71
+ print [" [ZIP] ^[ [1;35mCRC check failed!" crc "<>" crc2 "for file:^[ [0;35m" name "^[ [0m" ]
72
+ ]
73
+ output
74
+ ]
75
+
76
+ ;- decode
77
+ function/extern [
78
+ {Decompress all content of the ZIP file}
79
+ zip-data [binary! file! url! ]
80
+ /validate "Check if decompressed data has valid CRC"
81
+ /only "Extract only specified files if found in the achive"
82
+ files [block! ] "Block with file names to extract"
83
+ return: [block! ] "Result is in format: [NAME [MODIFIED CRC DATA] ...]"
84
+ ] [
85
+ unless binary? zip-data [
86
+ zip-data: read zip-data
87
+ ]
88
+ if verbose > 0 [
89
+ print ["^[ [1;32mDecode ZIP data^[ [m (^[ [1m" length? zip-data "^[ [mbytes )" ]
90
+ ]
91
+ bin: binary zip-data
92
+
93
+ data-pos: 0
94
+
95
+ if only [ files-to-extract: length? files ]
96
+
97
+ result: make block! 32
98
+
99
+ while [pos: find bin/buffer #{ 504b } ][
100
+ pos: index? pos
101
+ binary/read bin [AT :pos type: UI32LE]
102
+ switch /default type [
103
+ 134695760 [ ;#{08074B50}
104
+ if verbose > 1 [log-more "Data Descriptor" ]
105
+ binary/read bin [
106
+ crc: SI32LE
107
+ cmp-size: UI32LE ; compressed size
108
+ unc-size: UI32LE ; uncompressed size
109
+ ]
110
+
111
+ if all [only not find files name][
112
+ if verbose > 1 [log-debug "not extracting" ]
113
+ continue
114
+ ]
115
+
116
+ either all [name data-pos > 0 ] [
117
+ data: decompress* at zip-data :data-pos any [validate validate-crc?]
118
+ repend result [name reduce [modified crc data]]
119
+ ][
120
+ if verbose > 0 [log-info ["Decompressing: ^[ [33m" name]]
121
+ repend result [name none]
122
+ ]
123
+
124
+ if only [
125
+ -- files-to-extract
126
+ if files-to-extract = 0 [break]
127
+ ]
128
+ ]
129
+ 67324752 [ ;#{04034B50}
130
+ if verbose > 1 [log-more "Local file header" ]
131
+ header: binary/read bin [
132
+ UI16LE ; version
133
+ flags: BITSET16 ; flags
134
+ method: UI16LE ; compression
135
+ modified: MSDOS-DATETIME ; last modified
136
+ crc: SI32LE ; crc-32
137
+ cmp-size: UI32LE ; compressed size
138
+ unc-size: UI32LE ; uncompressed size
139
+ len-name: UI16LE
140
+ len-extr: UI16LE
141
+ name: BYTES :len-name
142
+ extr: BYTES :len-extr
143
+ data-pos: INDEX
144
+ ]
145
+ if verbose > 2 [log-debug mold header]
146
+ name: to file! name
147
+ if all [
148
+ flags/12 ; bit 3
149
+ crc = 0
150
+ cmp-size = 0
151
+ unc-size = 0
152
+ ][
153
+ ; The correct values are put in the data descriptor
154
+ ; immediately following the compressed data.
155
+ if verbose > 1 [log-debug "waiting for Data Descriptor" ]
156
+ continue
157
+ ]
158
+
159
+ if all [only not find files name][
160
+ if verbose > 1 [log-debug "not extracting" ]
161
+ continue
162
+ ]
163
+
164
+ either all [unc-size > 0 ] [
165
+ data: decompress* bin/buffer any [validate validate-crc?]
166
+ repend result [name reduce [modified crc data]]
167
+ ][
168
+ if verbose > 0 [log-info ["Decompressing: ^[ [33m" name]]
169
+ repend result [name none]
170
+ ]
171
+ if only [
172
+ -- files-to-extract
173
+ if files-to-extract = 0 [break]
174
+ ]
175
+ ]
176
+ 33639248 [ ;#{02014B50}
177
+ if verbose > 1 [log-more "Central directory structure" ]
178
+ cheader: binary/read bin [
179
+ UI16LE ; version made by
180
+ UI16LE ; version needed to extract
181
+ BITSET16 ; general purpose bit flag
182
+ UI16LE ; compression method
183
+ modified: MSDOS-DATETIME ; last modified
184
+ SI32LE ; crc-32
185
+ UI32LE ; compressed size
186
+ UI32LE ; uncompressed size
187
+ len-name: UI16LE ; file name length
188
+ len-extr: UI16LE ; extra field length
189
+ len-comm: UI16LE ; file comment length
190
+ disk-num: UI16LE ; disk number start
191
+ att-int: UI16LE ; internal file attributes
192
+ att-ext: UI32LE ; external file attributes
193
+ offset: UI32LE ; relative offset of local header
194
+ name: BYTES :len-name
195
+ extr: BYTES :len-extr
196
+ comm: BYTES :len-comm
197
+ ]
198
+ if verbose > 2 [log-debug mold cheader]
199
+ unless empty? comm [log-info ["Comment: ^[ [33m" to-string comm "^[ [0m" mold to file! name]]
200
+ ]
201
+ 101010256 [ ;#{06054B50}
202
+ if verbose > 1 [log-more "End of central directory record" ]
203
+ data: binary/read bin [
204
+ UI16LE ; number of this disk
205
+ UI16LE ; number of the disk with the start of the central directory
206
+ UI16LE ; total number of entries in the central directory on this disk
207
+ UI16LE ; total number of entries in the central directory
208
+ UI32LE ; size of the central directory
209
+ UI32LE ; offset of start of central directory with respect to the starting disk number
210
+ len: UI16LE ; .ZIP file comment length
211
+ BYTES :len ; .ZIP file comment
212
+ ]
213
+ if verbose > 2 [log-debug mold data]
214
+ ]
215
+ 101075792 [ ;#{06064b50}
216
+ if verbose > 1 [log-more "Zip64 end of central directory record" ]
217
+ data: binary/read bin [
218
+ UI64LE ; directory record
219
+ UI16LE ; version made by
220
+ UI16LE ; version needed to extract
221
+ UI32LE ; number of this disk
222
+ UI32LE ; number of the disk with the start of the central directory
223
+ UI64LE ; total number of entries in the central directory on this disk
224
+ UI64LE ; total number of entries in the central directory
225
+ UI64LE ; size of the central directory
226
+ UI64LE ; offset of start of central directory with respect to the starting disk number
227
+ ;@@BYTES ?? ; zip64 extensible data sector (variable size)
228
+ ]
229
+ if verbose > 2 [log-debug mold data]
230
+ ]
231
+ ][
232
+ if verbose > 1 [log-more ["Unknown ZIP signature:" mold skip to-binary type 4 ]]
233
+ ]
234
+ ]
235
+ buffer: none ; cleanup
236
+ new-line /skip result true 4
237
+ result
238
+ ][
239
+ ; external `decode` variables
240
+ name method crc cmp-size unc-size
241
+ ]
242
+ ]
243
+
244
+ ;encode: function [data [binary!]][ ]
245
+
246
+ identify : function [ data [binary! ]] [
247
+ all [
248
+ 4 < length? data
249
+ #"P" = data/1
250
+ #"K" = data/2
251
+ ]
252
+ ]
253
+ validate-crc?: true
254
+ verbose: 1
255
+ level: 9
256
+ ]
0 commit comments