Skip to content

Commit f5016c1

Browse files
committed
CHANGE: JSON codec code cleanup
1 parent 91aa512 commit f5016c1

File tree

1 file changed

+75
-101
lines changed

1 file changed

+75
-101
lines changed

src/mezz/codec-json.reb

+75-101
Original file line numberDiff line numberDiff line change
@@ -57,33 +57,6 @@ Rebol [
5757
;-----------------------------------------------------------
5858
;-- Generic support funcs
5959

60-
BOM: [
61-
UTF-8 #{EFBBBF}
62-
UTF-16-BE #{FEFF}
63-
UTF-16-LE #{FFFE}
64-
UTF-32-BE #{0000FEFF}
65-
UTF-32-LE #{FFFE0000}
66-
]
67-
68-
BOM-UTF-16?: func [data [string! binary!]][
69-
any [find/match/tail data BOM/UTF-16-BE find/match/tail data BOM/UTF-16-LE]
70-
]
71-
72-
BOM-UTF-32?: func [data [string! binary!]][
73-
any [find/match/tail data BOM/UTF-32-BE find/match/tail data BOM/UTF-32-LE]
74-
]
75-
76-
77-
; MOLD adds quotes string!, but not all any-string! values.
78-
enquote: func [str [string!] "(modified)"][append insert str {"} {"}]
79-
80-
high-surrogate?: func [codepoint [integer!]][
81-
all [codepoint >= 55296 codepoint <= 56319] ;D800h DBFFh
82-
]
83-
84-
low-surrogate?: func [codepoint [integer!]][
85-
all [codepoint >= 56320 codepoint <= 57343] ;DC00h DFFFh
86-
]
8760

8861
translit: func [
8962
"Transliterate sub-strings in a string"
@@ -106,8 +79,8 @@ translit: func [
10679

10780
;TBD: I think this can be improved. --Gregg
10881

109-
json-to-red-escape-table: [
110-
; JSON Red
82+
json-to-rebol-escape-table: [
83+
; JSON Rebol
11184
{\"} "^""
11285
{\\} "\"
11386
{\/} "/"
@@ -117,46 +90,46 @@ json-to-red-escape-table: [
11790
{\r} "^M"
11891
{\t} "^-"
11992
]
120-
red-to-json-escape-table: reverse copy json-to-red-escape-table
93+
rebol-to-json-escape-table: reverse copy json-to-rebol-escape-table
12194

122-
json-esc-ch: charset {"t\/nrbf} ; Backslash escaped JSON chars
123-
json-escaped: [#"\" json-esc-ch] ; Backslash escape rule
124-
red-esc-ch: charset {^"^-\/^/^M^H^L} ; Red chars requiring JSON backslash escapes
95+
rebol-esc-ch: charset {^"^-\/^/^M^H^L} ; Rebol chars requiring JSON backslash escapes
96+
json-esc-ch: charset {"t\/nrbf} ; Backslash escaped JSON chars
97+
json-escaped: [#"\" json-esc-ch] ; Backslash escape rule
12598

12699
decode-backslash-escapes: func [string [string!] "(modified)"][
127-
translit string json-escaped json-to-red-escape-table
100+
translit string json-escaped json-to-rebol-escape-table
128101
]
129102

130103
encode-backslash-escapes: func [string [string!] "(modified)"][
131-
translit string red-esc-ch red-to-json-escape-table
104+
translit string rebol-esc-ch rebol-to-json-escape-table
132105
]
133106

134-
ctrl-char: charset [#"^@" - #"^_"] ; Control chars 0-31
135107
;-----------------------------------------------------------
136-
;-- JSON decoder
108+
;-- JSON decoder
137109
;-----------------------------------------------------------
138110

139111
;# Basic rules
140-
ws: charset " ^-^/^M" ; Whitespace
112+
hex-char: system/catalog/bitsets/hex-digits
113+
digit: system/catalog/bitsets/numeric
114+
ws: system/catalog/bitsets/whitespace
141115
ws*: [any ws]
142116
ws+: [some ws]
143-
sep: [ws* #"," ws*] ; JSON value separator
144-
digit: charset "0123456789"
145-
non-zero-digit: charset "123456789"
146-
hex-char: charset "0123456789ABCDEFabcdef"
147-
chars: charset [not {\"} #"^@" - #"^_"] ; Unescaped chars (NOT creates a virtual bitset)
117+
sep: [ws* #"," ws*] ; JSON value separator
118+
non-zero-digit: #[bitset! #{0000000000007FC0}] ;= charset "123456789"
119+
; Unescaped chars (NOT creates a virtual bitset)
120+
chars: #[bitset! [not bits #{FFFFFFFF2000000000000008}]] ;=charset [not {\"} #"^@"-#"^_"]
148121

149-
; chars allowed in Red word! values - note that we don't allow < and > at all even though they are somewhat valid in word!
122+
; chars allowed in Rebol word! values - note that we don't allow < and > at all even though they are somewhat valid in word!
150123
not-word-char: charset {/\^^,[](){}"#%$@:;^/^(00A0) ^-^M<>}
151-
word-1st: complement append union not-word-char digit #"'"
124+
word-1st: complement append union not-word-char digit #"'"
152125
word-char: complement not-word-char
153126

154127
;-----------------------------------------------------------
155-
;-- JSON value rules
128+
;-- JSON value rules
156129
;-----------------------------------------------------------
157130

158131
;-----------------------------------------------------------
159-
;-- Number
132+
;-- Number
160133
sign: [#"-"]
161134
; Integers can't have leading zeros, but zero by itself is valid.
162135
int: [[non-zero-digit any digit] | digit]
@@ -166,12 +139,12 @@ number: [opt sign int opt frac opt exp]
166139
numeric-literal: :number
167140

168141
;-----------------------------------------------------------
169-
;-- String
142+
;-- String
170143
string-literal: [
171144
#"^"" copy _str [
172145
any [some chars | #"\" [#"u" 4 hex-char | json-esc-ch]]
173146
] #"^"" (
174-
if not empty? _str: any [_str copy ""] [
147+
if not empty? _str: any [_str copy ""][
175148
;!! If we reverse the decode-backslash-escapes and replace-unicode-escapes
176149
;!! calls, the string gets munged (extra U+ chars). Need to investigate.
177150
decode-backslash-escapes _str ; _str is modified
@@ -186,7 +159,7 @@ decode-unicode-char: func [
186159
ch [string!] "4 hex digits"
187160
][
188161
buf: {#"^^(0000)"} ; Don't COPY buffer, reuse it
189-
if not parse ch [4 hex-char] [return none] ; Validate input data
162+
if not parse ch [4 hex-char][return none] ; Validate input data
190163
attempt [load head change at buf 5 ch] ; Replace 0000 section in buf
191164
]
192165

@@ -209,7 +182,7 @@ replace-unicode-escapes: func [
209182
;mod-str: json-ctx/replace-unicode-escapes decode-backslash-escapes copy str
210183

211184
;-----------------------------------------------------------
212-
;-- Object
185+
;-- Object
213186
json-object: [
214187
; Emit a new block to our output target, and push it on our
215188
; working stack, to handle nested structures. Emit returns
@@ -244,7 +217,7 @@ property: [
244217
json-name: [ws* string-literal ws* #":"]
245218

246219
;-----------------------------------------------------------
247-
;-- List
220+
;-- List
248221
array-list: [json-value any [sep json-value]]
249222
json-array: [
250223
; Emit a new block to our output target, and push it on our
@@ -258,7 +231,7 @@ json-array: [
258231
]
259232

260233
;-----------------------------------------------------------
261-
;-- Any JSON Value (top level JSON parse rule)
234+
;-- Any JSON Value (top level JSON parse rule)
262235
json-value: [
263236
ws*
264237
[
@@ -275,7 +248,7 @@ json-value: [
275248
]
276249

277250
;-----------------------------------------------------------
278-
;-- Decoder data structures
251+
;-- Decoder data structures
279252

280253
; The stack is used to handle nested structures (objects and lists)
281254
stack: copy []
@@ -294,26 +267,23 @@ mark: none ; Current parse position
294267
emit: func [value][_res: insert/only _res value]
295268

296269
;-----------------------------------------------------------
297-
;-- Main decoder func
270+
;-- Main decoder func
298271

299272
load-json: func [
300-
"Convert a JSON string to Red data"
273+
"Convert a JSON string to Rebol data"
301274
input [string!] "The JSON string"
302-
] [
275+
][
303276
_out: _res: copy [] ; These point to the same position to start with
304277
mark: input
305-
either parse/case input json-value [pick _out 1] [
278+
either parse/case input json-value [pick _out 1][
306279
make error! form reduce [
307-
"Invalid json string. Near:"
308-
either tail? mark ["<end of input>"] [mold copy/part mark 40]
280+
"Invalid JSON string. Near:"
281+
either tail? mark ["<end of input>"][mold copy/part mark 40]
309282
]
310283
]
311284
]
312285

313286

314-
315-
316-
317287
;----------------------------------------------------------------
318288
;@@ to-json
319289

@@ -330,40 +300,43 @@ escapes: #[map! [
330300
#"^-" "\t"
331301
]]
332302

333-
init-state: func [ind ascii?] [
303+
init-state: func [ind ascii?][
334304
indent: ind
335305
indent-level: 0
336306
; 34 is double quote "
337307
; 92 is backslash \
338308
normal-chars: either ascii? [
339-
charset [32 33 35 - 91 93 - 127]
340-
] [
341-
complement charset [0 - 31 34 92]
309+
#[bitset! #{00000000DFFFFFFFFFFFFFF7FFFFFFFF}]
310+
;= charset [32 33 35 - 91 93 - 127]
311+
][
312+
#[bitset! [not bits #{FFFFFFFF2000000000000008}]]
313+
;= complement charset [0 - 31 34 92]
342314
]
343315
]
344316

345-
emit-indent: func [output level] [
317+
emit-indent: func [output level][
346318
indent-level: indent-level + level
347319
append/dup output indent indent-level
348320
]
349321

350-
emit-key-value: function [output sep map key] [
322+
emit-key-value: function [output sep map key][
351323
value: select/case map :key
352-
if any-word? :key [key: form key]
324+
if any-word? :key [key: form key]
353325
unless string? :key [key: mold :key]
354-
red-to-json-value output key
355-
append output sep
356-
red-to-json-value output :value
326+
rebol-to-json-value output :key
327+
append output :sep
328+
rebol-to-json-value output :value
357329
]
358330

359-
red-to-json-value: function [output value] [
331+
rebol-to-json-value: function [output value][
360332
special-char: none
361333
switch/default type?/word :value [
362-
none! [append output "null"]
363-
logic! [append output pick ["true" "false"] value]
364-
integer! decimal! [append output value]
365-
percent! [append output to decimal! value]
366-
string! [
334+
none! [append output "null"]
335+
logic! [append output pick ["true" "false"] value]
336+
integer!
337+
decimal! [append output value]
338+
percent! [append output to decimal! value]
339+
string! [
367340
append output #"^""
368341
parse value [
369342
any [
@@ -372,7 +345,7 @@ red-to-json-value: function [output value] [
372345
set special-char skip (
373346
either escape: select escapes special-char [
374347
append output escape
375-
] [
348+
][
376349
insert insert tail output "\u" to-hex/size to integer! special-char 4
377350
]
378351
)
@@ -383,36 +356,37 @@ red-to-json-value: function [output value] [
383356
block! [
384357
either empty? value [
385358
append output "[]"
386-
] [
359+
][
387360
either indent [
388361
append output "[^/"
389362
emit-indent output +1
390-
red-to-json-value output first value
363+
rebol-to-json-value output first value
391364
foreach v next value [
392365
append output ",^/"
393366
append/dup output indent indent-level
394-
red-to-json-value output :v
367+
rebol-to-json-value output :v
395368
]
396369
append output #"^/"
397370
emit-indent output -1
398-
] [
371+
][
399372
append output #"["
400-
red-to-json-value output first value
373+
rebol-to-json-value output first value
401374
foreach v next value [
402375
append output #","
403-
red-to-json-value output :v
376+
rebol-to-json-value output :v
404377
]
405378
]
406379
append output #"]"
407380
]
408381
]
409-
map! object! [
382+
map!
383+
object! [
410384
keys: words-of value
411385
either empty? keys [
412386
append output "{}"
413-
] [
387+
][
414388
either indent [
415-
append output "{^/" ; }
389+
append output "{^/"
416390
emit-indent output +1
417391
emit-key-value output ": " value first keys
418392
foreach k next keys [
@@ -422,8 +396,8 @@ red-to-json-value: function [output value] [
422396
]
423397
append output #"^/"
424398
emit-indent output -1
425-
] [
426-
append output #"{" ; }
399+
][
400+
append output #"{"
427401
emit-key-value output #":" value first keys
428402
foreach k next keys [
429403
append output #","
@@ -433,25 +407,25 @@ red-to-json-value: function [output value] [
433407
append output #"}"
434408
]
435409
]
436-
] [
437-
red-to-json-value output either any-block? :value [
410+
][
411+
rebol-to-json-value output either any-block? :value [
438412
to block! :value
439-
] [
440-
either any-string? :value [form value] [mold :value]
413+
][
414+
either any-string? :value [form value][mold :value]
441415
]
442416
]
443417
output
444418
]
445419

446420
to-json: function [
447-
"Convert Red data to a JSON string"
421+
"Convert Rebol data to a JSON string"
448422
data
449423
/pretty indent [string!] "Pretty format the output, using given indentation"
450424
/ascii "Force ASCII output (instead of UTF-8)"
451-
] [
425+
][
452426
result: make string! 4000
453427
init-state indent ascii
454-
red-to-json-value result data
428+
rebol-to-json-value result data
455429
]
456430

457431

@@ -462,11 +436,11 @@ register-codec [
462436
title: "JavaScript Object Notation"
463437
suffixes: [%.json]
464438

465-
encode: func [data [any-type!]] [
439+
encode: func [data [any-type!]][
466440
to-json data
467441
]
468-
decode: func [text [string! binary! file!]] [
469-
if file? text [text: read text]
442+
decode: func [text [string! binary! file!]][
443+
if file? text [text: read text]
470444
if binary? text [text: to string! text]
471445
load-json text
472446
]

0 commit comments

Comments
 (0)