Skip to content

Commit 02a3979

Browse files
committed
FEAT: a few optimizations after quick review of the original parse-xml code
1 parent 6ff6d7d commit 02a3979

File tree

1 file changed

+33
-116
lines changed

1 file changed

+33
-116
lines changed

src/mezz/codec-xml.r

+33-116
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ register-codec [
255255
][
256256
]
257257
characters: func [
258-
characters [string! none!]
258+
characters [string! char! none!]
259259
][
260260
]
261261
pi: func [
@@ -338,7 +338,7 @@ register-codec [
338338
print remold ['end-elem ns-uri local-name q-name]
339339
]
340340
characters: func [
341-
characters [string! none!]
341+
characters [string! char! none!]
342342
][
343343
print remold ['characters characters]
344344
]
@@ -387,36 +387,38 @@ register-codec [
387387
;
388388
; Seed the document
389389
;
390-
xml-block: reduce copy/deep ['document [version none
391-
encoding none
392-
standalone none
393-
doctype none
394-
pubid none
395-
sysid none
396-
subset none
397-
]
398-
none
399-
]
390+
xml-block: reduce copy/deep [
391+
'document [
392+
version none
393+
encoding none
394+
standalone none
395+
doctype none
396+
pubid none
397+
sysid none
398+
subset none
399+
]
400+
none
401+
]
400402
]
401403
xml-decl: func [
402404
version-info [string! none!]
403405
encoding [string! none!]
404406
standalone [string! none!]
405407
][
406-
change next (find xml-block/2 'version) version-info
407-
change next (find xml-block/2 'encoding) encoding
408-
change next (find xml-block/2 'standalone) standalone
408+
xml-block/2/version: version-info
409+
xml-block/2/encoding: encoding
410+
xml-block/2/standalone: standalone
409411
]
410412
document-type: func [
411413
document-type [string!]
412414
public-id [string! none!]
413415
system-id [string! none!]
414416
internal-subset [string! none!]
415417
][
416-
change next (find xml-block/2 'doctype) document-type
417-
change next (find xml-block/2 'pubid) public-id
418-
change next (find xml-block/2 'sysid) system-id
419-
change next (find xml-block/2 'subset) internal-subset
418+
xml-block/2/doctype: document-type
419+
xml-block/2/pubid: public-id
420+
xml-block/2/sysid: system-id
421+
xml-block/2/subset: internal-subset
420422
]
421423
start-element: func [
422424
ns-uri [string! none!]
@@ -442,7 +444,7 @@ register-codec [
442444
]
443445
]
444446
characters: func [
445-
characters [string! none!]
447+
characters [string! char! none!]
446448
][
447449
;
448450
; Accumulate more character data
@@ -495,48 +497,9 @@ register-codec [
495497
; processing. It should only be used with a parser that has been
496498
; set to namespace-aware true.
497499
;
498-
ns-block-handler: make xml-parse-handler [
499-
xml-doc: copy []
500-
xml-block: copy []
501-
xml-content: copy ""
500+
ns-block-handler: make block-handler [
502501
nsinfo-stack: copy []
503502

504-
start-document: func [
505-
][
506-
;
507-
; Seed the document
508-
;
509-
xml-block: reduce copy/deep ['document [version none
510-
encoding none
511-
standalone none
512-
doctype none
513-
pubid none
514-
sysid none
515-
subset none
516-
]
517-
none
518-
]
519-
]
520-
xml-decl: func [
521-
version-info [string! none!]
522-
encoding [string! none!]
523-
standalone [string! none!]
524-
][
525-
change next (find xml-block/2 'version) version-info
526-
change next (find xml-block/2 'encoding) encoding
527-
change next (find xml-block/2 'standalone) standalone
528-
]
529-
document-type: func [
530-
document-type [string!]
531-
public-id [string! none!]
532-
system-id [string! none!]
533-
internal-subset [string! none!]
534-
][
535-
change next (find xml-block/2 'doctype) document-type
536-
change next (find xml-block/2 'pubid) public-id
537-
change next (find xml-block/2 'sysid) system-id
538-
change next (find xml-block/2 'subset) internal-subset
539-
]
540503
start-element: func [
541504
ns-uri [string! none!]
542505
local-name [string! none!]
@@ -564,41 +527,7 @@ register-codec [
564527
xml-block/2: copy attr-list
565528
]
566529
]
567-
characters: func [
568-
characters [string! none!]
569-
][
570-
;
571-
; Accumulate more character data
572-
;
573-
if not none? characters [
574-
append xml-content characters
575-
]
576-
]
577-
end-element: func [
578-
ns-uri [string! none!]
579-
local-name [string! none!]
580-
q-name [string!]
581-
][
582-
;
583-
; Is there any pending content to add before
584-
; we terminate this element?
585-
;
586-
if not empty? xml-content [
587-
add-child copy xml-content
588-
clear head xml-content
589-
]
590-
;
591-
; Basic well-formedness check
592-
;
593-
; while [q-name <> first xml-block] [
594-
; if empty? xml-doc [
595-
; print ["End tag error:" q-name]
596-
; halt
597-
; ]
598-
; pop-xml-block
599-
; ]
600-
pop-xml-block
601-
]
530+
602531
start-prefix-mapping: func [
603532
ns-prefix-uri-pairs [block!]
604533
][
@@ -609,18 +538,6 @@ register-codec [
609538
][
610539
remove nsinfo-stack
611540
]
612-
add-child: func [child] [
613-
if none? third xml-block [xml-block/3: make block! 1]
614-
insert/only tail third xml-block child
615-
child
616-
]
617-
pop-xml-block: func [] [
618-
xml-block: last xml-doc
619-
remove back tail xml-doc
620-
]
621-
get-parse-result: func [] [
622-
xml-block
623-
]
624541
]
625542

626543

@@ -1078,8 +995,8 @@ register-codec [
1078995
;
1079996
convert-character-entity: func [{
1080997
Accepts the name reference portion of an entity
1081-
reference and attempts to return a string containing
1082-
the actual character referenced by the entity.
998+
reference and attempts to return the actual character
999+
referenced by the entity.
10831000
If the conversion is not successful, the value of
10841001
none is returned.
10851002
For example, for the ampersand character this function
@@ -1089,18 +1006,18 @@ register-codec [
10891006
entity-ref [string!]
10901007
][
10911008
switch/default entity-ref [
1092-
"lt" [ return "<" ]
1093-
"gt" [ return ">" ]
1094-
"amp" [ return "&" ]
1095-
"quot" [ return "^"" ]
1096-
"apos" [ return "'" ]
1009+
"lt" [ return #"<" ]
1010+
"gt" [ return #">" ]
1011+
"amp" [ return #"&" ]
1012+
"quot" [ return #"^"" ]
1013+
"apos" [ return #"'" ]
10971014
][
10981015
either (first entity-ref) = #"#" [
10991016
either (second entity-ref) = #"x" [
1100-
to-string to-char to-integer to-issue
1017+
to char! to integer! to issue!
11011018
skip entity-ref 2
11021019
][
1103-
to-string to-char to-integer
1020+
to char! to integer!
11041021
skip entity-ref 1
11051022
]
11061023
][

0 commit comments

Comments
 (0)