Skip to content

Commit

Permalink
encode: escape non-ascii characters during encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
AnnaShaleva committed Sep 15, 2021
1 parent aceee4d commit a668059
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 4 deletions.
2 changes: 1 addition & 1 deletion decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ var badUTF8 = []struct {
{"\xff", `"\ufffd"`},
{"\xff\xff", `"\ufffd\ufffd"`},
{"a\xffb", `"a\ufffdb"`},
{"\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`},
{"\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"\u65E5\u672C\ufffd\ufffd\ufffd"`},
}

func TestMarshalBadUTF8(t *testing.T) {
Expand Down
21 changes: 21 additions & 0 deletions encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"sync"
"sync/atomic"
"unicode"
"unicode/utf16"
"unicode/utf8"
// new in golang 1.9
"golang.org/x/sync/syncmap"
Expand Down Expand Up @@ -966,7 +967,17 @@ func (e *encodeState) string(s string, escapeHTML bool) int {
start = i
continue
}
if start < i {
e.WriteString(s[start:i])
}
if c < 0x10000 {
e.WriteString(fmt.Sprintf(`\u%04X`, c))
} else {
r1, r2 := utf16.EncodeRune(c)
e.WriteString(fmt.Sprintf(`\u%04X\u%04X`, r1, r2))
}
i += size
start = i
}
if start < len(s) {
e.WriteString(s[start:])
Expand Down Expand Up @@ -1043,7 +1054,17 @@ func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int {
start = i
continue
}
if start < i {
e.Write(s[start:i])
}
if c < 0x10000 {
e.WriteString(fmt.Sprintf(`\u%04X`, c))
} else {
r1, r2 := utf16.EncodeRune(c)
e.WriteString(fmt.Sprintf(`\u%04X\u%04X`, r1, r2))
}
i += size
start = i
}
if start < len(s) {
e.Write(s[start:])
Expand Down
13 changes: 13 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,19 @@ func (CText) MarshalText() ([]byte, error) {
return []byte(`"<&>"`), nil
}


func TestMarshaler_NeoGo_PR2174(t *testing.T) {
source := "IOU(欠条币):一种支持负数的NEP-17(非严格意义上的)资产,合约无存储区,账户由区块链浏览器统计"
b, err := Marshal(source)
if err != nil {
t.Fatalf("Marshal(c): %v", err)
}
want := `"` + `IOU\uFF08\u6B20\u6761\u5E01\uFF09\uFF1A\u4E00\u79CD\u652F\u6301\u8D1F\u6570\u7684NEP-17\uFF08\u975E\u4E25\u683C\u610F\u4E49\u4E0A\u7684\uFF09\u8D44\u4EA7\uFF0C\u5408\u7EA6\u65E0\u5B58\u50A8\u533A\uFF0C\u8D26\u6237\u7531\u533A\u5757\u94FE\u6D4F\u89C8\u5668\u7EDF\u8BA1` + `"`
if got := string(b); got != want {
t.Errorf("Marshal(c) = %#q, want %#q", got, want)
}
}

func TestMarshalerEscaping(t *testing.T) {
var c C
want := `"\u003c\u0026\u003e"`
Expand Down
6 changes: 3 additions & 3 deletions stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ null
true
false
["a","b","c"]
{"ß":"long s","":"Kelvin"}
{"\u00DF":"long s","\u212A":"Kelvin"}
3.14
`

Expand Down Expand Up @@ -71,8 +71,8 @@ false
>."c"
>]
{
>."ß": "long s",
>."": "Kelvin"
>."\u00DF": "long s",
>."\u212A": "Kelvin"
>}
3.14
`
Expand Down

0 comments on commit a668059

Please sign in to comment.