Skip to content

Commit

Permalink
<regex>: Fix character range bounds in case-insensitive regexes (#5164
Browse files Browse the repository at this point in the history
)
  • Loading branch information
muellerj2 authored Jan 14, 2025
1 parent 0408152 commit af0bd00
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 17 deletions.
33 changes: 16 additions & 17 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,7 @@ public:
void _Add_char(_Elem _Ch);
void _Add_class();
void _Add_char_to_class(_Elem _Ch);
void _Add_range(_Elem _Ex0, _Elem _Ex1);
void _Add_range2(_Elem, _Elem);
void _Add_named_class(_Regex_traits_base::char_class_type, bool = false);
void _Add_equiv(_FwdIt, _FwdIt, _Difft);
void _Add_coll(_FwdIt, _FwdIt, _Difft);
Expand Down Expand Up @@ -2882,19 +2882,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add
}

template <class _FwdIt, class _Elem, class _RxTraits>
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, _Elem _Arg1) {
void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range2(const _Elem _Arg0, const _Elem _Arg1) {
// add character range to set
unsigned int _Ex0;
unsigned int _Ex1;
if (_Flags & regex_constants::icase) { // change to lowercase range
_Ex0 = static_cast<unsigned int>(_Traits.translate_nocase(_Arg0));
_Ex1 = static_cast<unsigned int>(_Traits.translate_nocase(_Arg1));
} else {
_Ex0 = static_cast<typename _RxTraits::_Uelem>(_Arg0);
_Ex1 = static_cast<typename _RxTraits::_Uelem>(_Arg1);
}

unsigned int _Ex0 = static_cast<typename _RxTraits::_Uelem>(_Arg0);
const unsigned int _Ex1 = static_cast<typename _RxTraits::_Uelem>(_Arg1);
_Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current);

for (; _Ex0 <= _Ex1 && _Ex1 < _Get_bmax(); ++_Ex0) { // set a bit
if (!_Node->_Small) {
_Node->_Small = new _Bitmap;
Expand All @@ -2913,7 +2906,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range(_Elem _Arg0, _Elem _Arg1) {
}

_Node->_Ranges->_Insert(static_cast<_Elem>(_Ex0));
_Node->_Ranges->_Insert(static_cast<_Elem>(_Ex1));
_Node->_Ranges->_Insert(_Arg1);
}
}
}
Expand Down Expand Up @@ -4113,16 +4106,22 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas
_Error(regex_constants::error_range); // set precedes or follows dash
}

if (_Flags & regex_constants::collate) { // translate ends of range
_Val = _Traits.translate(static_cast<_Elem>(_Val));
_Elem _Chr2 = static_cast<_Elem>(_Val);

// translate ends of range
if (_Flags & regex_constants::icase) {
_Chr1 = _Traits.translate_nocase(_Chr1);
_Chr2 = _Traits.translate_nocase(_Chr2);
} else if (_Flags & regex_constants::collate) {
_Chr1 = _Traits.translate(_Chr1);
_Chr2 = _Traits.translate(_Chr2);
}

if (static_cast<typename _RxTraits::_Uelem>(_Val) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
if (static_cast<typename _RxTraits::_Uelem>(_Chr2) < static_cast<typename _RxTraits::_Uelem>(_Chr1)) {
_Error(regex_constants::error_range);
}

_Nfa._Add_range(_Chr1, static_cast<_Elem>(_Val));
_Nfa._Add_range2(_Chr1, _Chr2);
} else if (_Ret == _Prs_chr) {
_Nfa._Add_char_to_class(static_cast<_Elem>(_Val));
}
Expand Down
27 changes: 27 additions & 0 deletions tests/std/tests/Dev10_814245_regex_character_class_crash/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,39 @@ void test_VSO_984741_splitting_a_string_with_a_regex() {
assert(equal(i, sregex_token_iterator{}, begin(tokens), end(tokens)));
}

void test_gh_5164_case_insensitive_ranges() {
using ch_traits = char_traits<char>;
const regex_traits<char> re_traits;
for (size_t upper = 0; upper < characterCount; ++upper) {
for (size_t lower = 0; lower < characterCount; ++lower) {
const string pattern(g_firstCharacters[lower] + "-" + g_secondCharacters[upper]);
const char left_bound = re_traits.translate_nocase(g_inputs[lower][0]);
const char right_bound = re_traits.translate_nocase(g_inputs[upper][0]);

if (ch_traits::lt(right_bound, left_bound)) {
g_regexTester.should_throw(pattern, error_range, icase);
} else {
const regex r(pattern, icase);
for (size_t c = 0; c < characterCount; ++c) {
const char input_icase = re_traits.translate_nocase(g_inputs[c][0]);
if (ch_traits::lt(input_icase, left_bound) || ch_traits::lt(right_bound, input_icase)) {
g_regexTester.should_not_match(g_inputs[c], pattern, r);
} else {
g_regexTester.should_match(g_inputs[c], pattern, r);
}
}
}
}
}
}

int main() {
init_character_strings();
test_dev10_814245_character_class_should_not_crash();
test_dev10_723057_normal_to_high_bit_ranges_should_not_throw_error_range();
test_VSO_153556_singular_classes_can_have_high_bit_set();
test_VSO_984741_splitting_a_string_with_a_regex();
test_gh_5164_case_insensitive_ranges();

return g_regexTester.result();
}

0 comments on commit af0bd00

Please sign in to comment.