Skip to content

Commit 14bc42f

Browse files
committed
FEAT: PDF encoder input validation
1 parent cb0521a commit 14bc42f

File tree

2 files changed

+124
-22
lines changed

2 files changed

+124
-22
lines changed

src/mezz/codec-pdf.reb

+75-21
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ rl_newline: [CRLF | LF | CR]
3131
ch_number: #[bitset! #{000000000000FFC0}] ;charset "0123456789"
3232
ch_delimiter: #[bitset! #{0000000004C1000A0000001400000014}] ;charset "()<>[]{}/%"
3333
ch_str-valid: #[bitset! [not bits #{00EC000000C0000000000008}]] ;complement charset "^/^M^-^H^L()\"
34-
sp: #[bitset! #{0040000080}] ;charset " ^-"
34+
ch_sp: #[bitset! #{0040000080}] ;charset " ^-"
3535
ch_newline: #[bitset! #{0024}] ;charset CRLF
3636
ch_spnl: #[bitset! #{0064000080}] ;charset " ^-^/^L^M"
3737
ch_hex: #[bitset! #{000000000000FFC07FFFFFE07FFFFFE0}] ;charset [#"0" - #"9" #"a" - #"z" #"A" - #"Z"]
@@ -122,12 +122,12 @@ rl_string: [
122122
(value: to string! value)
123123
]
124124
rl_ref-id: [
125-
copy n1 some ch_number some sp
125+
copy n1 some ch_number some ch_sp
126126
copy n2 some ch_number
127127
(ref-id: as-pair n1: load n1 n2: load n2) ; ?? ref-id)
128128
]
129129
rl_reference: [
130-
rl_ref-id some sp #"R" (value: ref-id)
130+
rl_ref-id some ch_sp #"R" (value: ref-id)
131131
]
132132

133133
*stack: copy []
@@ -201,7 +201,7 @@ rl_array: [
201201

202202
rl_obj: [
203203
rl_ref-id (obj-id: ref-id );? obj-id)
204-
some sp
204+
some ch_sp
205205
"obj"
206206
any ch_spnl
207207
rl_value
@@ -266,7 +266,7 @@ rl_import-object: [
266266
]
267267

268268
rl_pdf: [
269-
"%PDF-" copy value some ch_not-newline rl_newline
269+
"%PDF-" copy value [some ch_number #"." some ch_number] any ch_sp rl_newline
270270
(pdf/version: to string! value)
271271
any ch_spnl
272272
opt rl_comment
@@ -399,8 +399,10 @@ emit-val: func[val][
399399
emit-obj: func[obj][
400400
out: insert out "<<"
401401
foreach [key val] obj [
402-
out: insert insert out "/" form key
403-
emit-val val
402+
unless none? val [
403+
out: insert insert out "/" form key
404+
emit-val val
405+
]
404406
]
405407
out: insert out ">>"
406408
]
@@ -417,8 +419,14 @@ get-xref-count: function[xrefs n][
417419
to integer! n
418420
]
419421

420-
emit-stream: func[obj][
421-
unless obj/spec/Filter [
422+
emit-stream: func[obj [object!]][
423+
unless find obj 'spec [
424+
extend obj 'spec #(Length: 0)
425+
]
426+
unless any [
427+
obj/spec/Filter
428+
300 > length? obj/data ; don't use compression on tiny strings
429+
][
422430
obj/spec/Filter: 'FlateDecode
423431
obj/data: compress/zlib obj/data
424432
]
@@ -427,6 +435,8 @@ emit-stream: func[obj][
427435
out: insert insert insert out "stream^M^/" obj/data "^M^/endstream"
428436
]
429437

438+
rebol-version-str: rejoin ["Rebol/" system/product " Version " system/version]
439+
430440

431441
remove-metadata: function [pdf [object!]][
432442
if all [
@@ -598,39 +608,82 @@ register-codec [
598608
system/options/log/pdf > 0
599609
map? info: try [pdf/objects/(pdf/trailer/info)]
600610
][
601-
sys/log/info 'PDF ["Author: ^[[m" info/Author]
602-
sys/log/info 'PDF ["Title: ^[[m" info/Title]
603-
sys/log/info 'PDF ["Created: ^[[m" info/CreationDate]
604-
sys/log/info 'PDF ["Modified:^[[m" info/ModDate]
605-
sys/log/info 'PDF ["Producer:^[[m" info/Producer]
606-
sys/log/info 'PDF ["Creator: ^[[m" info/Creator]
611+
if info/Author [sys/log/info 'PDF ["Author: ^[[m" info/Author]]
612+
if info/Title [sys/log/info 'PDF ["Title: ^[[m" info/Title]]
613+
if info/CreationDate [sys/log/info 'PDF ["Created: ^[[m" info/CreationDate]]
614+
if info/ModDate [sys/log/info 'PDF ["Modified:^[[m" info/ModDate]]
615+
if info/Producer [sys/log/info 'PDF ["Producer:^[[m" info/Producer]]
616+
if info/Creator [sys/log/info 'PDF ["Creator: ^[[m" info/Creator]]
607617
]
608618
also pdf pdf: none ; return result and release the internal value
609619
]
610620
encode: func [
611621
pdf [object!]
612-
/local xref xref-pos i n last-obj-id
622+
/local xref xref-pos i n last-obj-id version trailer objects info root
613623
][
614624
;@@ TODO!
615625
;-- This is just very simple encoder with not linearized output!
616626
;-- It does no input validity checks so user is responsible to provide
617627
;-- well formated pdf input object.
618628

629+
;- validate minimal requirements...
630+
objects: select pdf 'objects
631+
unless any [map? objects block? objects][
632+
sys/log/error 'PDF "Missing valid objects list!"
633+
return none
634+
]
635+
trailer: select pdf 'trailer
636+
unless trailer [
637+
extend pdf 'trailer trailer: #(Info: #[none] Root: #[none])
638+
]
639+
unless root: trailer/Root [
640+
sys/log/debug 'PDF "Trying to locate `Catalog` in PDF objects."
641+
foreach [ref obj] pdf/objects [
642+
if all [map? obj obj/Type = 'Catalog][
643+
trailer/Root: ref
644+
break
645+
]
646+
]
647+
]
648+
unless root: trailer/Root [
649+
sys/log/error 'PDF "Missing required `Catalog` object!"
650+
return none
651+
]
652+
if info: pick pdf/objects trailer/Info [
653+
unless info/CreationDate [info/CreationDate: now]
654+
if any [not info/Creator info/Creator = "Rebol"] [ info/Creator: rebol-version-str ]
655+
if any [not info/Producer info/Producer = "Rebol"] [ info/Producer: rebol-version-str ]
656+
657+
658+
info/ModDate: now
659+
]
660+
661+
unless version: select pdf 'version [ version: @1.3 ]
662+
if decimal? version [version: form version]
663+
unless parse version [some ch_number #"." some ch_number end][
664+
sys/log/error 'PDF ["Invalid PDF version:" mold version]
665+
return none
666+
]
667+
668+
;- File header..
619669
out: make binary! any [select pdf 'file-size 60000]
620-
out: insert out ajoin ["%PDF-" pdf/version "^M%"]
621-
out: insert out #{E2E3CFD3}
622-
out: insert out "^M^/"
670+
out: insert out ajoin ["%PDF-" version "^M%"]
671+
out: insert out #{E2E3CFD30D0A} ;= %âãÏÓ
623672
xref: copy []
673+
674+
675+
;- File body
624676
foreach [ref obj] pdf/objects [
625677
append xref reduce [ref -1 + index? out]
626678
out: insert insert out form-ref ref " obj^M"
627679
emit-val obj
628680

629681
out: insert out "^Mendobj^M"
630682
]
683+
684+
;- Cross-Reference Table
631685
xref-pos: out
632686
sort/skip xref 2
633-
;? xref
634687
i: 0
635688
n: get-xref-count xref i
636689
out: insert out ajoin [
@@ -656,8 +709,9 @@ register-codec [
656709
++ i
657710
]
658711
]
712+
;- File Trailer
659713
out: insert out "trailer^M^/"
660-
emit-val pdf/trailer
714+
emit-val trailer
661715
out: insert out ajoin ["^M^/startxref^M^/" -1 + index? xref-pos "^M^/%%EOF^M^/"]
662716

663717
head out

src/tests/units/codecs-test-pdf.r3

+49-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,58 @@ if find codecs 'pdf [
2323
--assert object? try [p1: load join %units/files/ file]
2424
--assert not error? try [save %tmp.pdf p1]
2525
--assert object? try [p2: load %tmp.pdf]
26-
--assert (keys-of p1/objects) = (keys-of p2/objects)
26+
--assert attempt [(keys-of p1/objects) = (keys-of p2/objects)]
2727
try [delete %tmp.pdf]
2828
]
2929

30+
--test-- "Save empty PDF"
31+
empty-pdf: object [
32+
trailer: #(
33+
Info: 1x0
34+
Root: 2x0
35+
)
36+
objects: #(
37+
;- Document Information Dictionary
38+
1x0 #(
39+
Producer: "Rebol"
40+
)
41+
;- Document Catalog
42+
2x0 #(
43+
Type: Catalog
44+
Pages: 3x0
45+
)
46+
;- Root of the document's page tree
47+
3x0 #(
48+
Type: Pages
49+
Kids: [5x0]
50+
Count: 1
51+
)
52+
;- Procedure Sets
53+
4x0 [
54+
PDF ; Painting and graphics state
55+
Text ; Text
56+
]
57+
;- First page
58+
5x0 #(
59+
Type: Page
60+
Parent: 3x0
61+
MediaBox: [0 0 612 792]
62+
Contents: 6x0
63+
Resources: #(
64+
ProcSet: 4x0
65+
)
66+
)
67+
;- Empty content
68+
6x0 #[object! [
69+
data: ""
70+
]]
71+
)
72+
]
73+
--assert not error? try [save %tmp-empty.pdf p1: empty-pdf]
74+
--assert object? try [p2: load %tmp-empty.pdf]
75+
--assert attempt [(keys-of p1/objects) = (keys-of p2/objects)]
76+
try [delete %tmp-empty.pdf]
77+
3078
===end-group===
3179
]
3280

0 commit comments

Comments
 (0)