Skip to content

Commit bccb0aa

Browse files
authored
protoprint: try to preserve printable unicode code points in output (#529)
Don't be so eager to resort to octal escapes. This makes the output readable for languages that use code points outside the 7-bit ASCII range. It also preserves use of other printable unicode symbols, including emoji.
1 parent 8dab444 commit bccb0aa

20 files changed

+294
-213
lines changed

desc/protoparse/test-source-info.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ desc_test_comments.proto:43:77
185185
some detached comments
186186

187187
Leading detached comment [1]:
188-
some detached comments
188+
some detached comments with unicode 这个是值
189189

190190
Leading detached comment [2]:
191191
Another field comment
@@ -381,7 +381,7 @@ desc_test_comments.proto:55:51
381381
desc_test_comments.proto:55:9
382382
desc_test_comments.proto:69:10
383383
Leading comments:
384-
Group comment
384+
Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
385385

386386
Trailing comments:
387387
trailer for Extras
@@ -829,12 +829,12 @@ desc_test_comments.proto:103:18
829829

830830
> message_type[0] > oneof_decl[1] > options:
831831
desc_test_comments.proto:105:17
832-
desc_test_comments.proto:105:57
832+
desc_test_comments.proto:105:89
833833

834834

835835
> message_type[0] > oneof_decl[1] > options > oofubar[0]:
836836
desc_test_comments.proto:105:17
837-
desc_test_comments.proto:105:57
837+
desc_test_comments.proto:105:89
838838
Leading comments:
839839
whoops?
840840

desc/protoprint/print.go

+64-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"reflect"
1111
"sort"
1212
"strings"
13+
"unicode"
14+
"unicode/utf8"
1315

1416
"github.com/golang/protobuf/proto"
1517
"github.com/golang/protobuf/protoc-gen-go/descriptor"
@@ -1656,7 +1658,7 @@ func (p *Printer) printOption(name string, optVal interface{}, w *writer, indent
16561658
case string:
16571659
fmt.Fprintf(w, "%s", quotedString(optVal))
16581660
case []byte:
1659-
fmt.Fprintf(w, "%s", quotedString(string(optVal)))
1661+
fmt.Fprintf(w, "%s", quotedBytes(string(optVal)))
16601662
case bool:
16611663
fmt.Fprintf(w, "%v", optVal)
16621664
case ident:
@@ -1978,12 +1980,12 @@ func optionsAsElementAddrs(optionsTag int32, order int, opts map[int32][]option)
19781980
return optAddrs
19791981
}
19801982

1981-
// quotedString implements the text format for string literals for protocol
1982-
// buffers. This form is also acceptable for string literals in option values
1983-
// by the protocol buffer compiler, protoc.
1984-
func quotedString(s string) string {
1983+
// quotedBytes implements the text format for string literals for protocol
1984+
// buffers. Since the underlying data is a bytes field, this encodes all
1985+
// bytes outside the 7-bit ASCII printable range. To preserve unicode strings
1986+
// without byte escapes, use quotedString.
1987+
func quotedBytes(s string) string {
19851988
var b bytes.Buffer
1986-
// use WriteByte here to get any needed indent
19871989
b.WriteByte('"')
19881990
// Loop over the bytes, not the runes.
19891991
for i := 0; i < len(s); i++ {
@@ -2014,6 +2016,62 @@ func quotedString(s string) string {
20142016
return b.String()
20152017
}
20162018

2019+
// quotedString implements the text format for string literals for protocol
2020+
// buffers. This form is also acceptable for string literals in option values
2021+
// by the protocol buffer compiler, protoc.
2022+
func quotedString(s string) string {
2023+
var b bytes.Buffer
2024+
b.WriteByte('"')
2025+
// Loop over the bytes, not the runes.
2026+
for {
2027+
r, n := utf8.DecodeRuneInString(s)
2028+
if n == 0 {
2029+
break // end of string
2030+
}
2031+
if r == utf8.RuneError && n == 1 {
2032+
// Invalid UTF8! Use an octal byte escape to encode the bad byte.
2033+
fmt.Fprintf(&b, "\\%03o", s[0])
2034+
s = s[1:]
2035+
continue
2036+
}
2037+
2038+
// Divergence from C++: we don't escape apostrophes.
2039+
// There's no need to escape them, and the C++ parser
2040+
// copes with a naked apostrophe.
2041+
switch r {
2042+
case '\n':
2043+
b.WriteString("\\n")
2044+
case '\r':
2045+
b.WriteString("\\r")
2046+
case '\t':
2047+
b.WriteString("\\t")
2048+
case '"':
2049+
b.WriteString("\\")
2050+
case '\\':
2051+
b.WriteString("\\\\")
2052+
default:
2053+
if unicode.IsPrint(r) {
2054+
b.WriteRune(r)
2055+
} else {
2056+
// if it's not printable, use a unicode escape
2057+
if r > 0xffff {
2058+
fmt.Fprintf(&b, "\\U%08X", r)
2059+
} else if r > 0x7F {
2060+
fmt.Fprintf(&b, "\\u%04X", r)
2061+
} else {
2062+
fmt.Fprintf(&b, "\\%03o", byte(r))
2063+
}
2064+
}
2065+
}
2066+
2067+
s = s[n:]
2068+
}
2069+
2070+
b.WriteByte('"')
2071+
2072+
return b.String()
2073+
}
2074+
20172075
type elementAddr struct {
20182076
elementType int32
20192077
elementIndex int

desc/protoprint/print_test.go

+13
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,16 @@ func checkContents(t *testing.T, actualContents string, goldenFileName string) {
257257

258258
testutil.Eq(t, string(b), actualContents, "wrong file contents for %s", goldenFileName)
259259
}
260+
261+
func TestQuoteString(t *testing.T) {
262+
// other tests have examples of encountering invalid UTF8 and printable unicode
263+
// so this is just for testing how unprintable valid unicode characters are rendered
264+
s := quotedString("\x04")
265+
testutil.Eq(t, "\"\\004\"", s)
266+
s = quotedString("\x7F")
267+
testutil.Eq(t, "\"\\177\"", s)
268+
s = quotedString("\u2028")
269+
testutil.Eq(t, "\"\\u2028\"", s)
270+
s = quotedString("\U0010FFFF")
271+
testutil.Eq(t, "\"\\U0010FFFF\"", s)
272+
}

desc/protoprint/testfiles/desc_test_comments-compact.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ message Request {
1919
// lead mfubar
2020
option (testprotos.mfubar) = true; // trailing mfubar
2121
// some detached comments
22-
// some detached comments
22+
// some detached comments with unicode 这个是值
2323
// Another field comment
2424
// label comment
2525
optional string name = 2 [default = "fubar"];
2626
extensions 100 to 200;
2727
extensions 201 to 250 [(testprotos.exfubarb) = "\000\001\002\003\004\005\006\007", (testprotos.exfubar) = "splat!"];
2828
reserved 10 to 20, 30 to 50;
2929
reserved "foo", "bar", "baz";
30-
// Group comment
30+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
3131
optional group Extras = 3 {
3232
// trailer for Extras
3333
// this is a custom option
@@ -65,7 +65,7 @@ message Request {
6565
// can be these or those
6666
oneof xyz {
6767
// whoops?
68-
option (testprotos.oofubar) = "whoops!";
68+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
6969
string these = 6;
7070
int32 those = 7;
7171
}

desc/protoprint/testfiles/desc_test_comments-custom-sort.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ message Request {
108108
string these = 6;
109109

110110
// whoops?
111-
option (testprotos.oofubar) = "whoops!";
111+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
112112
}
113113

114114
// can be this or that
@@ -125,7 +125,7 @@ message Request {
125125

126126
// some detached comments
127127

128-
// some detached comments
128+
// some detached comments with unicode 这个是值
129129

130130
// Another field comment
131131

@@ -140,7 +140,7 @@ message Request {
140140
json_name = "|foo|"
141141
]; // field trailer #1...
142142

143-
// Group comment
143+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
144144
optional group Extras = 3 {
145145
// trailer for Extras
146146

desc/protoprint/testfiles/desc_test_comments-default.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ message Request {
3737

3838
// some detached comments
3939

40-
// some detached comments
40+
// some detached comments with unicode 这个是值
4141

4242
// Another field comment
4343

@@ -55,7 +55,7 @@ message Request {
5555

5656
reserved "foo", "bar", "baz";
5757

58-
// Group comment
58+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
5959
optional group Extras = 3 {
6060
// trailer for Extras
6161

@@ -123,7 +123,7 @@ message Request {
123123
// can be these or those
124124
oneof xyz {
125125
// whoops?
126-
option (testprotos.oofubar) = "whoops!";
126+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
127127

128128
string these = 6;
129129

desc/protoprint/testfiles/desc_test_comments-multiline-style-comments.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ message Request {
3939

4040
/* some detached comments */
4141

42-
/* some detached comments */
42+
/* some detached comments with unicode 这个是值 */
4343

4444
/* Another field comment */
4545

@@ -57,7 +57,7 @@ message Request {
5757

5858
reserved "foo", "bar", "baz";
5959

60-
/* Group comment */
60+
/* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
6161
optional group Extras = 3 {
6262
/* trailer for Extras */
6363

@@ -125,7 +125,7 @@ message Request {
125125
/* can be these or those */
126126
oneof xyz {
127127
/* whoops? */
128-
option (testprotos.oofubar) = "whoops!";
128+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
129129

130130
string these = 6;
131131

desc/protoprint/testfiles/desc_test_comments-no-trailing-comments.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ message Request {
3737

3838
// some detached comments
3939

40-
// some detached comments
40+
// some detached comments with unicode 这个是值
4141

4242
// Another field comment
4343

@@ -55,7 +55,7 @@ message Request {
5555

5656
reserved "foo", "bar", "baz";
5757

58-
// Group comment
58+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
5959
optional group Extras = 3 {
6060
// this is a custom option
6161
option (testprotos.mfubar) = false;
@@ -117,7 +117,7 @@ message Request {
117117
// can be these or those
118118
oneof xyz {
119119
// whoops?
120-
option (testprotos.oofubar) = "whoops!";
120+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
121121

122122
string these = 6;
123123

desc/protoprint/testfiles/desc_test_comments-only-doc-comments.proto

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ message Request {
3636

3737
reserved "foo", "bar", "baz";
3838

39-
// Group comment
39+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
4040
optional group Extras = 3 {
4141
option (testprotos.mfubar) = false;
4242

@@ -95,7 +95,7 @@ message Request {
9595

9696
// can be these or those
9797
oneof xyz {
98-
option (testprotos.oofubar) = "whoops!";
98+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
9999

100100
string these = 6;
101101

desc/protoprint/testfiles/desc_test_comments-sorted-AND-multiline-style-comments.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@ message Request {
4242

4343
/* some detached comments */
4444

45-
/* some detached comments */
45+
/* some detached comments with unicode 这个是值 */
4646

4747
/* Another field comment */
4848

4949
/* label comment */
5050
optional string name = 2 [default = "fubar"];
5151

52-
/* Group comment */
52+
/* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
5353
optional group Extras = 3 {
5454
/* trailer for Extras */
5555

@@ -78,7 +78,7 @@ message Request {
7878
/* can be these or those */
7979
oneof xyz {
8080
/* whoops? */
81-
option (testprotos.oofubar) = "whoops!";
81+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
8282

8383
string these = 6;
8484

desc/protoprint/testfiles/desc_test_comments-sorted.proto

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ message Request {
3232
// label comment
3333
optional string name = 2 [default = "fubar"];
3434

35-
// Group comment
35+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
3636
optional group Extras = 3 {
3737
// trailer for Extras
3838

@@ -61,7 +61,7 @@ message Request {
6161
// can be these or those
6262
oneof xyz {
6363
// whoops?
64-
option (testprotos.oofubar) = "whoops!";
64+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
6565

6666
string these = 6;
6767

desc/protoprint/testfiles/desc_test_comments-trailing-on-next-line.proto

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ message Request {
4141

4242
// some detached comments
4343

44-
// some detached comments
44+
// some detached comments with unicode 这个是值
4545

4646
// Another field comment
4747

@@ -59,7 +59,7 @@ message Request {
5959

6060
reserved "foo", "bar", "baz";
6161

62-
// Group comment
62+
// Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
6363
optional group Extras = 3 {
6464
// trailer for Extras
6565

@@ -128,7 +128,7 @@ message Request {
128128
// can be these or those
129129
oneof xyz {
130130
// whoops?
131-
option (testprotos.oofubar) = "whoops!";
131+
option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
132132

133133
string these = 6;
134134

0 commit comments

Comments
 (0)