jhump
diff --git a/‎desc/protoparse/test-source-info.txt
+4-4 b/‎desc/protoparse/test-source-info.txt
+4-4
diff --git a/‎desc/protoprint/print.go
+64-6 b/‎desc/protoprint/print.go
+64-6
diff --git a/‎desc/protoprint/print_test.go
+13 b/‎desc/protoprint/print_test.go
+13
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-compact.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-compact.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-custom-sort.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-custom-sort.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-default.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-default.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-multiline-style-comments.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-multiline-style-comments.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-no-trailing-comments.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-no-trailing-comments.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-only-doc-comments.proto
+2-2 b/‎desc/protoprint/testfiles/desc_test_comments-only-doc-comments.proto
+2-2
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-sorted-AND-multiline-style-comments.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-sorted-AND-multiline-style-comments.proto
+3-3
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-sorted.proto
+2-2 b/‎desc/protoprint/testfiles/desc_test_comments-sorted.proto
+2-2
diff --git a/‎desc/protoprint/testfiles/desc_test_comments-trailing-on-next-line.proto
+3-3 b/‎desc/protoprint/testfiles/desc_test_comments-trailing-on-next-line.proto
+3-3
@@ -185,7 +185,7 @@ desc_test_comments.proto:43:77
  some detached comments
 
     Leading detached comment [1]:
- some detached comments
+ some detached comments with unicode 这个是值
 
     Leading detached comment [2]:
  Another field comment
@@ -381,7 +381,7 @@ desc_test_comments.proto:55:51
 desc_test_comments.proto:55:9
 desc_test_comments.proto:69:10
     Leading comments:
- Group comment
+ Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
 
     Trailing comments:
  trailer for Extras
@@ -829,12 +829,12 @@ desc_test_comments.proto:103:18
 
  > message_type[0] > oneof_decl[1] > options:
 desc_test_comments.proto:105:17
-desc_test_comments.proto:105:57
+desc_test_comments.proto:105:89
 
 
  > message_type[0] > oneof_decl[1] > options > oofubar[0]:
 desc_test_comments.proto:105:17
-desc_test_comments.proto:105:57
+desc_test_comments.proto:105:89
     Leading comments:
  whoops?
 
 
@@ -10,6 +10,8 @@ import (
 	"reflect"
 	"sort"
 	"strings"
+	"unicode"
+	"unicode/utf8"
 
 	"github.com/golang/protobuf/proto"
 	"github.com/golang/protobuf/protoc-gen-go/descriptor"
@@ -1656,7 +1658,7 @@ func (p *Printer) printOption(name string, optVal interface{}, w *writer, indent
 	case string:
 		fmt.Fprintf(w, "%s", quotedString(optVal))
 	case []byte:
-		fmt.Fprintf(w, "%s", quotedString(string(optVal)))
+		fmt.Fprintf(w, "%s", quotedBytes(string(optVal)))
 	case bool:
 		fmt.Fprintf(w, "%v", optVal)
 	case ident:
@@ -1978,12 +1980,12 @@ func optionsAsElementAddrs(optionsTag int32, order int, opts map[int32][]option)
 	return optAddrs
 }
 
-// quotedString implements the text format for string literals for protocol
-// buffers. This form is also acceptable for string literals in option values
-// by the protocol buffer compiler, protoc.
-func quotedString(s string) string {
+// quotedBytes implements the text format for string literals for protocol
+// buffers. Since the underlying data is a bytes field, this encodes all
+// bytes outside the 7-bit ASCII printable range. To preserve unicode strings
+// without byte escapes, use quotedString.
+func quotedBytes(s string) string {
 	var b bytes.Buffer
-	// use WriteByte here to get any needed indent
 	b.WriteByte('"')
 	// Loop over the bytes, not the runes.
 	for i := 0; i < len(s); i++ {
@@ -2014,6 +2016,62 @@ func quotedString(s string) string {
 	return b.String()
 }
 
+// quotedString implements the text format for string literals for protocol
+// buffers. This form is also acceptable for string literals in option values
+// by the protocol buffer compiler, protoc.
+func quotedString(s string) string {
+	var b bytes.Buffer
+	b.WriteByte('"')
+	// Loop over the bytes, not the runes.
+	for {
+		r, n := utf8.DecodeRuneInString(s)
+		if n == 0 {
+			break // end of string
+		}
+		if r == utf8.RuneError && n == 1 {
+			// Invalid UTF8! Use an octal byte escape to encode the bad byte.
+			fmt.Fprintf(&b, "\\%03o", s[0])
+			s = s[1:]
+			continue
+		}
+
+		// Divergence from C++: we don't escape apostrophes.
+		// There's no need to escape them, and the C++ parser
+		// copes with a naked apostrophe.
+		switch r {
+		case '\n':
+			b.WriteString("\\n")
+		case '\r':
+			b.WriteString("\\r")
+		case '\t':
+			b.WriteString("\\t")
+		case '"':
+			b.WriteString("\\")
+		case '\\':
+			b.WriteString("\\\\")
+		default:
+			if unicode.IsPrint(r) {
+				b.WriteRune(r)
+			} else {
+				// if it's not printable, use a unicode escape
+				if r > 0xffff {
+					fmt.Fprintf(&b, "\\U%08X", r)
+				} else if r > 0x7F {
+					fmt.Fprintf(&b, "\\u%04X", r)
+				} else {
+					fmt.Fprintf(&b, "\\%03o", byte(r))
+				}
+			}
+		}
+
+		s = s[n:]
+	}
+
+	b.WriteByte('"')
+
+	return b.String()
+}
+
 type elementAddr struct {
 	elementType  int32
 	elementIndex int
 
@@ -257,3 +257,16 @@ func checkContents(t *testing.T, actualContents string, goldenFileName string) {
 
 	testutil.Eq(t, string(b), actualContents, "wrong file contents for %s", goldenFileName)
 }
+
+func TestQuoteString(t *testing.T) {
+	// other tests have examples of encountering invalid UTF8 and printable unicode
+	// so this is just for testing how unprintable valid unicode characters are rendered
+	s := quotedString("\x04")
+	testutil.Eq(t, "\"\\004\"", s)
+	s = quotedString("\x7F")
+	testutil.Eq(t, "\"\\177\"", s)
+	s = quotedString("\u2028")
+	testutil.Eq(t, "\"\\u2028\"", s)
+	s = quotedString("\U0010FFFF")
+	testutil.Eq(t, "\"\\U0010FFFF\"", s)
+}
@@ -19,15 +19,15 @@ message Request {
   // lead mfubar
   option (testprotos.mfubar) = true; // trailing mfubar
   // some detached comments
-  // some detached comments
+  // some detached comments with unicode 这个是值
   // Another field comment
   // label comment
   optional string name = 2 [default = "fubar"];
   extensions 100 to 200;
   extensions 201 to 250 [(testprotos.exfubarb) = "\000\001\002\003\004\005\006\007", (testprotos.exfubar) = "splat!"];
   reserved 10 to 20, 30 to 50;
   reserved "foo", "bar", "baz";
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     // trailer for Extras
     // this is a custom option
@@ -65,7 +65,7 @@ message Request {
   // can be these or those
   oneof xyz {
     // whoops?
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
     string these = 6;
     int32 those = 7;
   }
 
@@ -108,7 +108,7 @@ message Request {
     string these = 6;
 
     // whoops?
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
   }
 
   // can be this or that
@@ -125,7 +125,7 @@ message Request {
 
   // some detached comments
 
-  // some detached comments
+  // some detached comments with unicode 这个是值
 
   // Another field comment
 
@@ -140,7 +140,7 @@ message Request {
     json_name = "|foo|"
   ]; // field trailer #1...
 
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     // trailer for Extras
 
 
@@ -37,7 +37,7 @@ message Request {
 
   // some detached comments
 
-  // some detached comments
+  // some detached comments with unicode 这个是值
 
   // Another field comment
 
@@ -55,7 +55,7 @@ message Request {
 
   reserved "foo", "bar", "baz";
 
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     // trailer for Extras
 
@@ -123,7 +123,7 @@ message Request {
   // can be these or those
   oneof xyz {
     // whoops?
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
     string these = 6;
 
 
@@ -39,7 +39,7 @@ message Request {
 
 	/* some detached comments */
 
-	/* some detached comments */
+	/* some detached comments with unicode 这个是值 */
 
 	/* Another field comment */
 
@@ -57,7 +57,7 @@ message Request {
 
 	reserved "foo", "bar", "baz";
 
-	/* Group comment */
+	/* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
 	optional group Extras = 3 {
 		/* trailer for Extras */
 
@@ -125,7 +125,7 @@ message Request {
 	/* can be these or those */
 	oneof xyz {
 		/* whoops? */
-		option (testprotos.oofubar) = "whoops!";
+		option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
 		string these = 6;
 
 
@@ -37,7 +37,7 @@ message Request {
 
   // some detached comments
 
-  // some detached comments
+  // some detached comments with unicode 这个是值
 
   // Another field comment
 
@@ -55,7 +55,7 @@ message Request {
 
   reserved "foo", "bar", "baz";
 
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     // this is a custom option
     option (testprotos.mfubar) = false;
@@ -117,7 +117,7 @@ message Request {
   // can be these or those
   oneof xyz {
     // whoops?
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
     string these = 6;
 
 
@@ -36,7 +36,7 @@ message Request {
 
   reserved "foo", "bar", "baz";
 
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     option (testprotos.mfubar) = false;
 
@@ -95,7 +95,7 @@ message Request {
 
   // can be these or those
   oneof xyz {
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
     string these = 6;
 
 
@@ -42,14 +42,14 @@ message Request {
 
   /* some detached comments */
 
-  /* some detached comments */
+  /* some detached comments with unicode 这个是值 */
 
   /* Another field comment */
 
   /* label comment */
   optional string name = 2 [default = "fubar"];
 
-  /* Group comment */
+  /* Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐 */
   optional group Extras = 3 {
     /* trailer for Extras */
 
@@ -78,7 +78,7 @@ message Request {
   /* can be these or those */
   oneof xyz {
     /* whoops? */
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
     string these = 6;
 
 
@@ -32,7 +32,7 @@ message Request {
    // label comment
    optional string name = 2 [default = "fubar"];
 
-   // Group comment
+   // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
    optional group Extras = 3 {
       // trailer for Extras
 
@@ -61,7 +61,7 @@ message Request {
    // can be these or those
    oneof xyz {
       // whoops?
-      option (testprotos.oofubar) = "whoops!";
+      option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
       string these = 6;
 
 
@@ -41,7 +41,7 @@ message Request {
 
   // some detached comments
 
-  // some detached comments
+  // some detached comments with unicode 这个是值
 
   // Another field comment
 
@@ -59,7 +59,7 @@ message Request {
 
   reserved "foo", "bar", "baz";
 
-  // Group comment
+  // Group comment with emoji 😀 😍 👻 ❤ 💯 💥 🐶 🦂 🥑 🍻 🌍 🚕 🪐
   optional group Extras = 3 {
     // trailer for Extras
 
@@ -128,7 +128,7 @@ message Request {
   // can be these or those
   oneof xyz {
     // whoops?
-    option (testprotos.oofubar) = "whoops!";
+    option (testprotos.oofubar) = "whoops, this has invalid UTF8! \274\377";
 
     string these = 6;