Skip to content

Commit e721269

Browse files
authored
Fix using optional between numbers; forbid duplicate and empty strings in tries (#362)
Fix not being able to parse some separator-free formatsу The reproducer was: ``` offsetHours(Padding.NONE) optional { optional { char(':') } offsetMinutesOfHour() } ``` This showed us one bug and one inefficiency, both of which are fixed here. Inefficiency: the `optional { char(':') }` should for all intents and purposes be equivalent to `alternativeParsing({ char(':') }) { }`: both the empty string and the `:` should be parsed, and, according to `optional`'s definition, if none of the fields mentioned in the block are non-zero, nothing should be output. However, such `optional` still created a fake parser element that notified all the fields that they are zero when an empty string is parsed, even though there are no fields. Bug: the fake parser element that notifies the fields that they are zero even though nothing of note was parsed interfered with the mechanism supporting compact formats. Number parsers are greedy, so `number(hh); number(mm)` gets merged into `number(hhmm)`. If there is something between them, this merge can't happen: `number(hh); string("x"); number(mm)`. For this reason, parsers that don't accept any strings are forbidden (or just very unlikely to happen)--except for the zero-width unconditional modification parser. This bug is fixed by moving such parsers to the end of the parser: `number(hh); modification(); number(mm); string("x")` will get transformed to `number(hhmm); string("x"); modification()`. To further improve the robustness of the parser, we revisited other places where zero-width parsers were possible and forbade them: now, AM/PM markers, month names, and day-of-week names can't be empty. This limitation can be lifted at a later point, but it wouldn't be easy. While we were at it, we also require distinct month names, day-of-week names, and AM/PM markers, just because we were near that place in the code.
1 parent ce30554 commit e721269

File tree

8 files changed

+117
-15
lines changed

8 files changed

+117
-15
lines changed

core/common/src/format/DateTimeFormatBuilder.kt

+3
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ public sealed interface DateTimeFormatBuilder {
129129
*
130130
* [am] is used for the AM marker (0-11 hours), [pm] is used for the PM marker (12-23 hours).
131131
*
132+
* Empty strings can not be used as markers.
133+
* [IllegalArgumentException] is thrown if either [am] or [pm] is empty or if they are equal.
134+
*
132135
* @see [amPmHour]
133136
*/
134137
public fun amPmMarker(am: String, pm: String)

core/common/src/format/LocalDateFormat.kt

+20
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import kotlinx.datetime.internal.format.parser.Copyable
1212

1313
/**
1414
* A description of how month names are formatted.
15+
*
16+
* An [IllegalArgumentException] will be thrown if some month name is empty or there are duplicate names.
1517
*/
1618
public class MonthNames(
1719
/**
@@ -21,6 +23,14 @@ public class MonthNames(
2123
) {
2224
init {
2325
require(names.size == 12) { "Month names must contain exactly 12 elements" }
26+
names.indices.forEach { ix ->
27+
require(names[ix].isNotEmpty()) { "A month name can not be empty" }
28+
for (ix2 in 0 until ix) {
29+
require(names[ix] != names[ix2]) {
30+
"Month names must be unique, but '${names[ix]}' was repeated"
31+
}
32+
}
33+
}
2434
}
2535

2636
/**
@@ -63,6 +73,8 @@ internal fun MonthNames.toKotlinCode(): String = when (this.names) {
6373

6474
/**
6575
* A description of how day of week names are formatted.
76+
*
77+
* An [IllegalArgumentException] will be thrown if some day-of-week name is empty or there are duplicate names.
6678
*/
6779
public class DayOfWeekNames(
6880
/**
@@ -72,6 +84,14 @@ public class DayOfWeekNames(
7284
) {
7385
init {
7486
require(names.size == 7) { "Day of week names must contain exactly 7 elements" }
87+
names.indices.forEach { ix ->
88+
require(names[ix].isNotEmpty()) { "A day-of-week name can not be empty" }
89+
for (ix2 in 0 until ix) {
90+
require(names[ix] != names[ix2]) {
91+
"Day-of-week names must be unique, but '${names[ix]}' was repeated"
92+
}
93+
}
94+
}
7595
}
7696

7797
/**

core/common/src/internal/format/FormatStructure.kt

+19-11
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,17 @@ internal class OptionalFormatStructure<in T>(
160160
listOf(
161161
ConstantFormatStructure<T>(onZero).parser(),
162162
ParserStructure(
163-
listOf(
164-
UnconditionalModification {
165-
for (field in fields) {
166-
field.assignDefault(it)
163+
if (fields.isEmpty()) {
164+
emptyList()
165+
} else {
166+
listOf(
167+
UnconditionalModification {
168+
for (field in fields) {
169+
field.assignDefault(it)
170+
}
167171
}
168-
}
169-
),
172+
)
173+
},
170174
emptyList()
171175
)
172176
).concat()
@@ -176,12 +180,16 @@ internal class OptionalFormatStructure<in T>(
176180
override fun formatter(): FormatterStructure<T> {
177181
val formatter = format.formatter()
178182
val predicate = conjunctionPredicate(fields.map { it.isDefaultComparisonPredicate() })
179-
return ConditionalFormatter(
180-
listOf(
181-
predicate::test to ConstantStringFormatterStructure(onZero),
182-
Truth::test to formatter
183+
return if (predicate is Truth) {
184+
ConstantStringFormatterStructure(onZero)
185+
} else {
186+
ConditionalFormatter(
187+
listOf(
188+
predicate::test to ConstantStringFormatterStructure(onZero),
189+
Truth::test to formatter
190+
)
183191
)
184-
)
192+
}
185193
}
186194

187195
private class PropertyWithDefault<in T, E> private constructor(

core/common/src/internal/format/parser/Parser.kt

+11-4
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,21 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
4949
ParserStructure(operations, followedBy.map { it.append(other) })
5050
}
5151

52-
fun <T> ParserStructure<T>.simplify(): ParserStructure<T> {
52+
fun <T> ParserStructure<T>.simplify(unconditionalModifications: List<UnconditionalModification<T>>): ParserStructure<T> {
5353
val newOperations = mutableListOf<ParserOperation<T>>()
5454
var currentNumberSpan: MutableList<NumberConsumer<T>>? = null
55-
// joining together the number consumers in this parser before the first alternative
55+
val unconditionalModificationsForTails = unconditionalModifications.toMutableList()
56+
// joining together the number consumers in this parser before the first alternative;
57+
// collecting the unconditional modifications to push them to the end of all the parser's branches.
5658
for (op in operations) {
5759
if (op is NumberSpanParserOperation) {
5860
if (currentNumberSpan != null) {
5961
currentNumberSpan.addAll(op.consumers)
6062
} else {
6163
currentNumberSpan = op.consumers.toMutableList()
6264
}
65+
} else if (op is UnconditionalModification) {
66+
unconditionalModificationsForTails.add(op)
6367
} else {
6468
if (currentNumberSpan != null) {
6569
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
@@ -69,7 +73,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
6973
}
7074
}
7175
val mergedTails = followedBy.flatMap {
72-
val simplified = it.simplify()
76+
val simplified = it.simplify(unconditionalModificationsForTails)
7377
// parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
7478
// unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d).
7579
// As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty
@@ -78,6 +82,9 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
7882
simplified.followedBy.ifEmpty { listOf(simplified) }
7983
else
8084
listOf(simplified)
85+
}.ifEmpty {
86+
// preserving the invariant that `mergedTails` contains all unconditional modifications
87+
listOf(ParserStructure(unconditionalModificationsForTails, emptyList()))
8188
}
8289
return if (currentNumberSpan == null) {
8390
// the last operation was not a number span, or it was a number span that we are allowed to interrupt
@@ -115,7 +122,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
115122
}
116123
}
117124
val naiveParser = foldRight(ParserStructure<T>(emptyList(), emptyList())) { parser, acc -> parser.append(acc) }
118-
return naiveParser.simplify()
125+
return naiveParser.simplify(emptyList())
119126
}
120127

121128
internal interface Copyable<Self> {

core/common/src/internal/format/parser/ParserOperation.kt

+2
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ internal class StringSetParserOperation<Output>(
157157

158158
init {
159159
for (string in strings) {
160+
require(string.isNotEmpty()) { "Found an empty string in $whatThisExpects" }
160161
var node = trie
161162
for (char in string) {
162163
val searchResult = node.children.binarySearchBy(char.toString()) { it.first }
@@ -166,6 +167,7 @@ internal class StringSetParserOperation<Output>(
166167
node.children[searchResult].second
167168
}
168169
}
170+
require(!node.isTerminal) { "The string '$string' was passed several times" }
169171
node.isTerminal = true
170172
}
171173
fun reduceTrie(trie: TrieNode) {

core/common/test/format/DateTimeFormatTest.kt

+12
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ class DateTimeFormatTest {
137137
}
138138
}
139139
}
140+
141+
@Test
142+
fun testOptionalBetweenConsecutiveNumbers() {
143+
val format = UtcOffset.Format {
144+
offsetHours(Padding.NONE)
145+
optional {
146+
optional { offsetSecondsOfMinute() }
147+
offsetMinutesOfHour()
148+
}
149+
}
150+
assertEquals(UtcOffset(-7, -30), format.parse("-730"))
151+
}
140152
}
141153

142154
fun <T> DateTimeFormat<T>.assertCanNotParse(input: String) {

core/common/test/format/LocalDateFormatTest.kt

+32
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,38 @@ class LocalDateFormatTest {
221221
assertEquals("2020 Jan 05", format.format(LocalDate(2020, 1, 5)))
222222
}
223223

224+
@Test
225+
fun testEmptyMonthNames() {
226+
val names = MonthNames.ENGLISH_FULL.names
227+
for (i in 0 until 12) {
228+
val newNames = (0 until 12).map { if (it == i) "" else names[it] }
229+
assertFailsWith<IllegalArgumentException> { MonthNames(newNames) }
230+
}
231+
}
232+
233+
@Test
234+
fun testEmptyDayOfWeekNames() {
235+
val names = DayOfWeekNames.ENGLISH_FULL.names
236+
for (i in 0 until 7) {
237+
val newNames = (0 until 7).map { if (it == i) "" else names[it] }
238+
assertFailsWith<IllegalArgumentException> { DayOfWeekNames(newNames) }
239+
}
240+
}
241+
242+
@Test
243+
fun testIdenticalMonthNames() {
244+
assertFailsWith<IllegalArgumentException> {
245+
MonthNames("Jan", "Jan", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
246+
}
247+
}
248+
249+
@Test
250+
fun testIdenticalDayOfWeekNames() {
251+
assertFailsWith<IllegalArgumentException> {
252+
DayOfWeekNames("Mon", "Tue", "Tue", "Thu", "Fri", "Sat", "Sun")
253+
}
254+
}
255+
224256
private fun test(strings: Map<LocalDate, Pair<String, Set<String>>>, format: DateTimeFormat<LocalDate>) {
225257
for ((date, stringsForDate) in strings) {
226258
val (canonicalString, otherStrings) = stringsForDate

core/common/test/format/LocalTimeFormatTest.kt

+18
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,24 @@ class LocalTimeFormatTest {
216216
}
217217
}
218218

219+
@Test
220+
fun testEmptyAmPmMarkers() {
221+
assertFailsWith<IllegalArgumentException> {
222+
LocalTime.Format {
223+
amPmMarker("", "pm")
224+
}
225+
}
226+
}
227+
228+
@Test
229+
fun testIdenticalAmPmMarkers() {
230+
assertFailsWith<IllegalArgumentException> {
231+
LocalTime.Format {
232+
amPmMarker("pm", "pm")
233+
}
234+
}
235+
}
236+
219237
private fun test(strings: Map<LocalTime, Pair<String, Set<String>>>, format: DateTimeFormat<LocalTime>) {
220238
for ((date, stringsForDate) in strings) {
221239
val (canonicalString, otherStrings) = stringsForDate

0 commit comments

Comments
 (0)