Skip to content

Commit 0a9b9d9

Browse files
vslashgbaylesj
andauthored
Fix a parser bug where tokens are misidentified as commas. (#1502)
* Fix a parser bug where tokens are misidentified as commas. In the old and new readers, when parsing an object, a comment followed by any non-`}` token is treated as a comma. The new unit test required changing the runjsontests.py flag regime so that failure tests could be run with default settings. * Honor allowComments==false mode. Much of the comment handling in the parsers is bespoke, and does not honor this flag. By unfiying it under a common API, the parser is simplified and strict mode is now more correctly strict. Note that allowComments mode does not allow for comments in arbitrary locations; they are allowed only in certain positions. Rectifying this is a bigger effort, since collectComments mode requires storing the comments somewhere, and it's not immediately clear where in the DOM all such comments should live. --------- Co-authored-by: Jordan Bayles <bayles.jordan@gmail.com>
1 parent c3a9866 commit 0a9b9d9

8 files changed

+49
-55
lines changed

include/json/reader.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ class JSON_API Reader {
190190
using Errors = std::deque<ErrorInfo>;
191191

192192
bool readToken(Token& token);
193+
bool readTokenSkippingComments(Token& token);
193194
void skipSpaces();
194195
bool match(const Char* pattern, int patternLength);
195196
bool readComment();
@@ -221,7 +222,6 @@ class JSON_API Reader {
221222
int& column) const;
222223
String getLocationLineAndColumn(Location location) const;
223224
void addComment(Location begin, Location end, CommentPlacement placement);
224-
void skipCommentTokens(Token& token);
225225

226226
static bool containsNewLine(Location begin, Location end);
227227
static String normalizeEOL(Location begin, Location end);

src/jsontestrunner/main.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,14 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
240240
return printUsage(argv);
241241
}
242242
int index = 1;
243-
if (Json::String(argv[index]) == "--json-checker") {
244-
opts->features = Json::Features::strictMode();
243+
if (Json::String(argv[index]) == "--parse-only") {
245244
opts->parseOnly = true;
246245
++index;
247246
}
247+
if (Json::String(argv[index]) == "--strict") {
248+
opts->features = Json::Features::strictMode();
249+
++index;
250+
}
248251
if (Json::String(argv[index]) == "--json-config") {
249252
printConfig();
250253
return 3;

src/lib_json/json_reader.cpp

+25-49
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
129129

130130
bool successful = readValue();
131131
Token token;
132-
skipCommentTokens(token);
132+
readTokenSkippingComments(token);
133133
if (collectComments_ && !commentsBefore_.empty())
134134
root.setComment(commentsBefore_, commentAfter);
135135
if (features_.strictRoot_) {
@@ -157,7 +157,7 @@ bool Reader::readValue() {
157157
throwRuntimeError("Exceeded stackLimit in readValue().");
158158

159159
Token token;
160-
skipCommentTokens(token);
160+
readTokenSkippingComments(token);
161161
bool successful = true;
162162

163163
if (collectComments_ && !commentsBefore_.empty()) {
@@ -225,14 +225,14 @@ bool Reader::readValue() {
225225
return successful;
226226
}
227227

228-
void Reader::skipCommentTokens(Token& token) {
228+
bool Reader::readTokenSkippingComments(Token& token) {
229+
bool success = readToken(token);
229230
if (features_.allowComments_) {
230-
do {
231-
readToken(token);
232-
} while (token.type_ == tokenComment);
233-
} else {
234-
readToken(token);
231+
while (success && token.type_ == tokenComment) {
232+
success = readToken(token);
233+
}
235234
}
235+
return success;
236236
}
237237

238238
bool Reader::readToken(Token& token) {
@@ -446,12 +446,7 @@ bool Reader::readObject(Token& token) {
446446
Value init(objectValue);
447447
currentValue().swapPayload(init);
448448
currentValue().setOffsetStart(token.start_ - begin_);
449-
while (readToken(tokenName)) {
450-
bool initialTokenOk = true;
451-
while (tokenName.type_ == tokenComment && initialTokenOk)
452-
initialTokenOk = readToken(tokenName);
453-
if (!initialTokenOk)
454-
break;
449+
while (readTokenSkippingComments(tokenName)) {
455450
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
456451
return true;
457452
name.clear();
@@ -480,15 +475,11 @@ bool Reader::readObject(Token& token) {
480475
return recoverFromError(tokenObjectEnd);
481476

482477
Token comma;
483-
if (!readToken(comma) ||
484-
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
485-
comma.type_ != tokenComment)) {
478+
if (!readTokenSkippingComments(comma) ||
479+
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
486480
return addErrorAndRecover("Missing ',' or '}' in object declaration",
487481
comma, tokenObjectEnd);
488482
}
489-
bool finalizeTokenOk = true;
490-
while (comma.type_ == tokenComment && finalizeTokenOk)
491-
finalizeTokenOk = readToken(comma);
492483
if (comma.type_ == tokenObjectEnd)
493484
return true;
494485
}
@@ -518,10 +509,7 @@ bool Reader::readArray(Token& token) {
518509

519510
Token currentToken;
520511
// Accept Comment after last item in the array.
521-
ok = readToken(currentToken);
522-
while (currentToken.type_ == tokenComment && ok) {
523-
ok = readToken(currentToken);
524-
}
512+
ok = readTokenSkippingComments(currentToken);
525513
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
526514
currentToken.type_ != tokenArrayEnd);
527515
if (!ok || badTokenType) {
@@ -943,6 +931,7 @@ class OurReader {
943931
using Errors = std::deque<ErrorInfo>;
944932

945933
bool readToken(Token& token);
934+
bool readTokenSkippingComments(Token& token);
946935
void skipSpaces();
947936
void skipBom(bool skipBom);
948937
bool match(const Char* pattern, int patternLength);
@@ -976,7 +965,6 @@ class OurReader {
976965
int& column) const;
977966
String getLocationLineAndColumn(Location location) const;
978967
void addComment(Location begin, Location end, CommentPlacement placement);
979-
void skipCommentTokens(Token& token);
980968

981969
static String normalizeEOL(Location begin, Location end);
982970
static bool containsNewLine(Location begin, Location end);
@@ -1030,7 +1018,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
10301018
bool successful = readValue();
10311019
nodes_.pop();
10321020
Token token;
1033-
skipCommentTokens(token);
1021+
readTokenSkippingComments(token);
10341022
if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
10351023
addError("Extra non-whitespace after JSON value.", token);
10361024
return false;
@@ -1058,7 +1046,7 @@ bool OurReader::readValue() {
10581046
if (nodes_.size() > features_.stackLimit_)
10591047
throwRuntimeError("Exceeded stackLimit in readValue().");
10601048
Token token;
1061-
skipCommentTokens(token);
1049+
readTokenSkippingComments(token);
10621050
bool successful = true;
10631051

10641052
if (collectComments_ && !commentsBefore_.empty()) {
@@ -1145,14 +1133,14 @@ bool OurReader::readValue() {
11451133
return successful;
11461134
}
11471135

1148-
void OurReader::skipCommentTokens(Token& token) {
1136+
bool OurReader::readTokenSkippingComments(Token& token) {
1137+
bool success = readToken(token);
11491138
if (features_.allowComments_) {
1150-
do {
1151-
readToken(token);
1152-
} while (token.type_ == tokenComment);
1153-
} else {
1154-
readToken(token);
1139+
while (success && token.type_ == tokenComment) {
1140+
success = readToken(token);
1141+
}
11551142
}
1143+
return success;
11561144
}
11571145

11581146
bool OurReader::readToken(Token& token) {
@@ -1449,12 +1437,7 @@ bool OurReader::readObject(Token& token) {
14491437
Value init(objectValue);
14501438
currentValue().swapPayload(init);
14511439
currentValue().setOffsetStart(token.start_ - begin_);
1452-
while (readToken(tokenName)) {
1453-
bool initialTokenOk = true;
1454-
while (tokenName.type_ == tokenComment && initialTokenOk)
1455-
initialTokenOk = readToken(tokenName);
1456-
if (!initialTokenOk)
1457-
break;
1440+
while (readTokenSkippingComments(tokenName)) {
14581441
if (tokenName.type_ == tokenObjectEnd &&
14591442
(name.empty() ||
14601443
features_.allowTrailingCommas_)) // empty object or trailing comma
@@ -1491,15 +1474,11 @@ bool OurReader::readObject(Token& token) {
14911474
return recoverFromError(tokenObjectEnd);
14921475

14931476
Token comma;
1494-
if (!readToken(comma) ||
1495-
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1496-
comma.type_ != tokenComment)) {
1477+
if (!readTokenSkippingComments(comma) ||
1478+
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
14971479
return addErrorAndRecover("Missing ',' or '}' in object declaration",
14981480
comma, tokenObjectEnd);
14991481
}
1500-
bool finalizeTokenOk = true;
1501-
while (comma.type_ == tokenComment && finalizeTokenOk)
1502-
finalizeTokenOk = readToken(comma);
15031482
if (comma.type_ == tokenObjectEnd)
15041483
return true;
15051484
}
@@ -1533,10 +1512,7 @@ bool OurReader::readArray(Token& token) {
15331512

15341513
Token currentToken;
15351514
// Accept Comment after last item in the array.
1536-
ok = readToken(currentToken);
1537-
while (currentToken.type_ == tokenComment && ok) {
1538-
ok = readToken(currentToken);
1539-
}
1515+
ok = readTokenSkippingComments(currentToken);
15401516
bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
15411517
currentToken.type_ != tokenArrayEnd);
15421518
if (!ok || badTokenType) {

test/data/fail_strict_comment_01.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"a": "aaa",
3+
"b": "bbb" // comments not allowed in strict mode
4+
}

test/data/fail_strict_comment_02.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"a": "aaa", // comments not allowed in strict mode
3+
"b": "bbb"
4+
}

test/data/fail_strict_comment_03.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"array" : [1, 2, 3 /* comments not allowed in strict mode */]
3+
}

test/data/fail_test_object_02.json

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"one": 1 /* } */ { "two" : 2 }

test/runjsontests.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,17 @@ def runAllTests(jsontest_executable_path, input_dir = None,
9797
valgrind_path = use_valgrind and VALGRIND_CMD or ''
9898
for input_path in tests + test_jsonchecker:
9999
expect_failure = os.path.basename(input_path).startswith('fail')
100-
is_json_checker_test = (input_path in test_jsonchecker) or expect_failure
100+
is_json_checker_test = input_path in test_jsonchecker
101+
is_parse_only = is_json_checker_test or expect_failure
102+
is_strict_test = ('_strict_' in os.path.basename(input_path)) or is_json_checker_test
101103
print('TESTING:', input_path, end=' ')
102-
options = is_json_checker_test and '--json-checker' or ''
104+
options = is_parse_only and '--parse-only' or ''
105+
options += is_strict_test and ' --strict' or ''
103106
options += ' --json-writer %s'%writerClass
104107
cmd = '%s%s %s "%s"' % ( valgrind_path, jsontest_executable_path, options,
105108
input_path)
106109
status, process_output = getStatusOutput(cmd)
107-
if is_json_checker_test:
110+
if is_parse_only:
108111
if expect_failure:
109112
if not status:
110113
print('FAILED')

0 commit comments

Comments
 (0)