diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index cf70fe9a1a5c6d..aa30b754bc1dcf 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -447,6 +447,9 @@ private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(I ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); return requiredHelpers; + // Emit a goto for the specified label. + void Goto(string label) => writer.WriteLine($"goto {label};"); + // Emits any anchors. Returns true if the anchor roots any match to a specific location and thus no further // searching is required; otherwise, false. bool EmitAnchors() @@ -459,7 +462,7 @@ bool EmitAnchors() additionalDeclarations.Add("int beginning = base.runtextbeg;"); using (EmitBlock(writer, "if (pos > beginning)")) { - writer.WriteLine($"goto {NoStartingPositionFound};"); + Goto(NoStartingPositionFound); } writer.WriteLine("return true;"); return true; @@ -468,7 +471,7 @@ bool EmitAnchors() writer.WriteLine("// Start \\G anchor"); using (EmitBlock(writer, "if (pos > base.runtextstart)")) { - writer.WriteLine($"goto {NoStartingPositionFound};"); + Goto(NoStartingPositionFound); } writer.WriteLine("return true;"); return true; @@ -528,7 +531,7 @@ bool EmitAnchors() writer.WriteLine("int newlinePos = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), '\\n');"); using (EmitBlock(writer, "if (newlinePos < 0 || newlinePos + pos + 1 > end)")) { - writer.WriteLine($"goto {NoStartingPositionFound};"); + Goto(NoStartingPositionFound); } writer.WriteLine("pos = newlinePos + pos + 1;"); } @@ -621,7 +624,7 @@ void EmitFixedSet() writer.WriteLine($"int indexOfPos = {indexOf};"); using (EmitBlock(writer, "if (indexOfPos < 0)")) { - writer.WriteLine($"goto {NoStartingPositionFound};"); + Goto(NoStartingPositionFound); } writer.WriteLine("i += indexOfPos;"); writer.WriteLine(); @@ -630,7 +633,7 @@ void EmitFixedSet() { using (EmitBlock(writer, $"if (i >= span.Length - {minRequiredLength - 1})")) { - writer.WriteLine($"goto {NoStartingPositionFound};"); + Goto(NoStartingPositionFound); } writer.WriteLine(); } @@ -844,7 +847,7 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex writer.WriteLine("int pos = base.runtextpos, end = base.runtextend;"); writer.WriteLine($"int original_pos = pos;"); bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm); - bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm); + bool hasTextInfo = EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(writer, rm); writer.Flush(); int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length; int additionalDeclarationsIndent = writer.Indent; @@ -866,7 +869,7 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex // processing the next branch N+1: that way, any failures in the branch N's processing will // implicitly end up jumping to the right location without needing to know in what context it's used. string doneLabel = ReserveName("NoMatch"); - string originalDoneLabel = doneLabel; + string topLevelDoneLabel = doneLabel; // Check whether there are captures anywhere in the expression. If there isn't, we can skip all // the boilerplate logic around uncapturing, as there won't be anything to uncapture. @@ -885,17 +888,6 @@ private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTex writer.WriteLine("base.runtextpos = pos;"); writer.WriteLine("base.Capture(0, original_pos, pos);"); writer.WriteLine("return true;"); - writer.WriteLine(); - - // We only get here in the code if the whole expression fails to match and jumps to - // the original value of doneLabel. - writer.WriteLine("// The input didn't match."); - MarkLabel(originalDoneLabel, emitSemicolon: !expressionHasCaptures); - if (expressionHasCaptures) - { - EmitUncaptureUntil("0"); - } - writer.WriteLine("return false;"); // We're done with the match. @@ -931,6 +923,35 @@ string ReserveName(string prefix) // when it's known the label will always be followed by a statement. void MarkLabel(string label, bool emitSemicolon = true) => writer.WriteLine($"{label}:{(emitSemicolon ? ";" : "")}"); + // Emits a goto to jump to the specified label. However, if the specified label is the top-level done label indicating + // that the entire match has failed, we instead emit our epilogue, uncapturing if necessary and returning out of TryMatchAtCurrentPosition. + void Goto(string label) + { + if (label == topLevelDoneLabel) + { + // We only get here in the code if the whole expression fails to match and jumps to + // the original value of doneLabel. + if (expressionHasCaptures) + { + EmitUncaptureUntil("0"); + } + writer.WriteLine("return false; // The input didn't match."); + } + else + { + writer.WriteLine($"goto {label};"); + } + } + + // Emits a case or default line followed by an indented body. + void CaseGoto(string clause, string label) + { + writer.WriteLine(clause); + writer.Indent++; + Goto(label); + writer.Indent--; + } + // Whether the node has RegexOptions.IgnoreCase set. static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0; @@ -956,7 +977,7 @@ void EmitSpanLengthCheck(int requiredLength, string? dynamicRequiredLength = nul Debug.Assert(requiredLength > 0); using (EmitBlock(writer, $"if ({SpanLengthCheck(requiredLength, dynamicRequiredLength)})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } @@ -1175,7 +1196,7 @@ static RegexNode CloneMultiWithoutFirstChar(RegexNode node) } // Default branch if the character didn't match the start of any branches. - writer.WriteLine($"default: goto {doneLabel};"); + CaseGoto("default:", doneLabel); } } @@ -1280,7 +1301,7 @@ void EmitAllBranches() // matched and need to skip over that code. If, however, this is the // last branch and this is an atomic alternation, we can just fall // through to the successfully matched location. - writer.WriteLine($"goto {matchLabel};"); + Goto(matchLabel); } // Reset state for next branch and loop around to generate it. This includes @@ -1327,7 +1348,7 @@ void EmitAllBranches() { for (int i = 0; i < labelMap.Length; i++) { - writer.WriteLine($"case {i}: goto {labelMap[i]};"); + CaseGoto($"case {i}:", labelMap[i]); } } writer.WriteLine(); @@ -1367,7 +1388,7 @@ void EmitBackreference(RegexNode node) writer.WriteLine($"// If the {DescribeCapture(node.M, analysis)} hasn't matched, the backreference doesn't match."); using (EmitBlock(writer, $"if (!base.IsMatched({capnum}))")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine(); EmitWhenHasCapture(); @@ -1387,7 +1408,7 @@ void EmitWhenHasCapture() writer.WriteLine($"if ({sliceSpan}.Length < matchLength || "); using (EmitBlock(writer, $" !global::System.MemoryExtensions.SequenceEqual(inputSpan.Slice(base.MatchIndex({capnum}), matchLength), {sliceSpan}.Slice(0, matchLength)))")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } else @@ -1395,7 +1416,7 @@ void EmitWhenHasCapture() // For case-insensitive, we have to walk each character individually. using (EmitBlock(writer, $"if ({sliceSpan}.Length < matchLength)")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine(); @@ -1405,7 +1426,7 @@ void EmitWhenHasCapture() { using (EmitBlock(writer, $"if ({ToLower(hasTextInfo, options, $"inputSpan[matchIndex + i]")} != {ToLower(hasTextInfo, options, $"{sliceSpan}[i]")})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } } @@ -1479,7 +1500,7 @@ void EmitBackreferenceConditional(RegexNode node) // Check to see if the specified capture number was captured. using (EmitBlock(writer, $"if (!base.IsMatched({capnum}))")) { - writer.WriteLine($"goto {refNotMatched};"); + Goto(refNotMatched); } writer.WriteLine(); @@ -1497,7 +1518,7 @@ void EmitBackreferenceConditional(RegexNode node) bool needsEndConditional = postYesDoneLabel != originalDoneLabel || noBranch is not null; if (needsEndConditional) { - writer.WriteLine($"goto {endConditional};"); + Goto(endConditional); writer.WriteLine(); } @@ -1533,7 +1554,7 @@ void EmitBackreferenceConditional(RegexNode node) if (hasBacktracking) { // Skip the backtracking section. - writer.WriteLine($"goto {endConditional};"); + Goto(endConditional); writer.WriteLine(); // Backtrack section @@ -1547,15 +1568,15 @@ void EmitBackreferenceConditional(RegexNode node) { if (postYesDoneLabel != originalDoneLabel) { - writer.WriteLine($"case 0: goto {postYesDoneLabel};"); + CaseGoto("case 0:", postYesDoneLabel); } if (postNoDoneLabel != originalDoneLabel) { - writer.WriteLine($"case 1: goto {postNoDoneLabel};"); + CaseGoto("case 1:", postNoDoneLabel); } - writer.WriteLine($"default: goto {originalDoneLabel};"); + CaseGoto("default:", originalDoneLabel); } } @@ -1652,7 +1673,7 @@ void EmitExpressionConditional(RegexNode node) { writer.WriteLine($"{resumeAt} = 0;"); } - writer.WriteLine($"goto {endConditional};"); + Goto(endConditional); writer.WriteLine(); // After the condition completes unsuccessfully, reset the text positions @@ -1703,7 +1724,7 @@ void EmitExpressionConditional(RegexNode node) else { // Skip the backtracking section. - writer.WriteLine($"goto {endConditional};"); + Goto(endConditional); writer.WriteLine(); string backtrack = ReserveName("ConditionalExpressionBacktrack"); @@ -1715,15 +1736,15 @@ void EmitExpressionConditional(RegexNode node) { if (postYesDoneLabel != originalDoneLabel) { - writer.WriteLine($"case 0: goto {postYesDoneLabel};"); + CaseGoto("case 0:", postYesDoneLabel); } if (postNoDoneLabel != originalDoneLabel) { - writer.WriteLine($"case 1: goto {postNoDoneLabel};"); + CaseGoto("case 1:", postNoDoneLabel); } - writer.WriteLine($"default: goto {originalDoneLabel};"); + CaseGoto("default:", originalDoneLabel); } MarkLabel(endConditional, emitSemicolon: false); @@ -1752,7 +1773,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) { using (EmitBlock(writer, $"if (!base.IsMatched({uncapnum}))")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine(); } @@ -1793,7 +1814,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) // Skip past the backtracking section string end = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {end};"); + Goto(end); writer.WriteLine(); // Emit a backtracking section that restores the capture's state and then jumps to the previous done label @@ -1805,7 +1826,7 @@ void EmitCapture(RegexNode node, RegexNode? subsequent = null) writer.WriteLine($"pos = {startingPos};"); SliceInputSpan(writer); } - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); writer.WriteLine(); doneLabel = backtrack; @@ -1864,7 +1885,7 @@ void EmitNegativeLookaheadAssertion(RegexNode node) // If the generated code ends up here, it matched the lookahead, which actually // means failure for a _negative_ lookahead, so we need to jump to the original done. writer.WriteLine(); - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); writer.WriteLine(); // Failures (success for a negative lookahead) jump here. @@ -1896,7 +1917,7 @@ void EmitNode(RegexNode node, RegexNode? subsequent = null, bool emitLengthCheck // A match failure doesn't need a scope. case RegexNodeKind.Nothing: - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); return; // Atomic is invisible in the generated source, other than its impact on the targets of jumps @@ -2102,7 +2123,7 @@ RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop writer.WriteLine(prevDescription is not null ? $") // {prevDescription}" : ")"); using (EmitBlock(writer, null)) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } if (i < childCount) { @@ -2182,7 +2203,7 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset { using (EmitBlock(writer, emitLengthCheck ? $"if ({SpanLengthCheck(1, offset)} || {expr})" : $"if ({expr})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } @@ -2213,7 +2234,7 @@ RegexNodeKind.Boundary or using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } @@ -2231,14 +2252,14 @@ void EmitAnchors(RegexNode node) { // If we statically know we've already matched part of the regex, there's no way we're at the // beginning or start, as we've already progressed past it. - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } else { additionalDeclarations.Add(node.Kind == RegexNodeKind.Beginning ? "int beginning = base.runtextbeg;" : "int start = base.runtextstart;"); using (EmitBlock(writer, node.Kind == RegexNodeKind.Beginning ? "if (pos != beginning)" : "if (pos != start)")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } break; @@ -2248,7 +2269,7 @@ void EmitAnchors(RegexNode node) { using (EmitBlock(writer, $"if ({sliceSpan}[{sliceStaticPos - 1}] != '\\n')")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } else @@ -2257,7 +2278,7 @@ void EmitAnchors(RegexNode node) additionalDeclarations.Add("int beginning = base.runtextbeg;"); using (EmitBlock(writer, $"if (pos > beginning && inputSpan[pos - 1] != '\\n')")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } break; @@ -2265,7 +2286,7 @@ void EmitAnchors(RegexNode node) case RegexNodeKind.End: using (EmitBlock(writer, $"if ({IsSliceLengthGreaterThanSliceStaticPos()})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } break; @@ -2273,14 +2294,14 @@ void EmitAnchors(RegexNode node) writer.WriteLine($"if ({sliceSpan}.Length > {sliceStaticPos + 1} || ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n'))"); using (EmitBlock(writer, null)) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } break; case RegexNodeKind.Eol: using (EmitBlock(writer, $"if ({IsSliceLengthGreaterThanSliceStaticPos()} && {sliceSpan}[{sliceStaticPos}] != '\\n')")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } break; @@ -2316,7 +2337,7 @@ void EmitMultiCharString(string str, bool caseInsensitive, bool emitLengthCheck) string textSpanIndex = sliceStaticPos > 0 ? $"i + {sliceStaticPos}" : "i"; using (EmitBlock(writer, $"if ({ToLower(hasTextInfo, options, $"{sliceSpan}[{textSpanIndex}]")} != {Literal(str)}[i])")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } } @@ -2325,7 +2346,7 @@ void EmitMultiCharString(string str, bool caseInsensitive, bool emitLengthCheck) string sourceSpan = sliceStaticPos > 0 ? $"{sliceSpan}.Slice({sliceStaticPos})" : sliceSpan; using (EmitBlock(writer, $"if (!global::System.MemoryExtensions.StartsWith({sourceSpan}, {Literal(str)}))")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } @@ -2371,7 +2392,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL TransferSliceStaticPosToPos(); writer.WriteLine($"{endingPos} = pos;"); EmitAdd(writer, startingPos, node.M); - writer.WriteLine($"goto {endLoop};"); + Goto(endLoop); writer.WriteLine(); // Backtracking section. Subsequent failures will jump to here, at which @@ -2398,7 +2419,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL _ => $" ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3)})) < 0)", })) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine($"{endingPos} += {startingPos};"); writer.WriteLine($"pos = {endingPos};"); @@ -2407,7 +2428,7 @@ void EmitSingleCharLoop(RegexNode node, RegexNode? subsequent = null, bool emitL { using (EmitBlock(writer, $"if ({startingPos} >= {endingPos})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine($"pos = --{endingPos};"); } @@ -2484,7 +2505,7 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL // Skip the backtracking section for the initial subsequent matching. We've already matched the // minimum number of iterations, which means we can successfully match with zero additional iterations. string endLoopLabel = ReserveName("LazyLoopEnd"); - writer.WriteLine($"goto {endLoopLabel};"); + Goto(endLoopLabel); writer.WriteLine(); // Backtracking section. Subsequent failures will jump to here. @@ -2504,7 +2525,7 @@ void EmitSingleCharLazy(RegexNode node, RegexNode? subsequent = null, bool emitL { using (EmitBlock(writer, $"if ({iterationCount} >= {maxIterations})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine($"{iterationCount}++;"); } @@ -2539,7 +2560,7 @@ node.Kind is RegexNodeKind.Notonelazy && }); using (EmitBlock(writer, $"if ((uint){startingPos} >= (uint){sliceSpan}.Length || {sliceSpan}[{startingPos}] == {Literal(node.Ch)})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine($"pos += {startingPos};"); SliceInputSpan(writer); @@ -2563,7 +2584,7 @@ node.Kind is RegexNodeKind.Setlazy && }); using (EmitBlock(writer, $"if ({startingPos} < 0)")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine($"pos += {startingPos};"); SliceInputSpan(writer); @@ -2603,7 +2624,7 @@ node.Kind is RegexNodeKind.Setlazy && // Skip past the backtracking section string end = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {end};"); + Goto(end); writer.WriteLine(); // Emit a backtracking section that restores the loop's state and then jumps to the previous done label @@ -2613,7 +2634,7 @@ node.Kind is RegexNodeKind.Setlazy && Array.Reverse(toPushPopArray); EmitStackPop(toPushPopArray); - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); writer.WriteLine(); doneLabel = backtrack; @@ -2677,7 +2698,7 @@ void EmitLazy(RegexNode node) // will then bring us back in to do further iterations. if (minIterations == 0) { - writer.WriteLine($"goto {endLoop};"); + Goto(endLoop); } writer.WriteLine(); @@ -2728,7 +2749,7 @@ void EmitLazy(RegexNode node) { using (EmitBlock(writer, $"if ({CountIsLessThan(iterationCount, minIterations)})")) { - writer.WriteLine($"goto {body};"); + Goto(body); } } @@ -2741,7 +2762,7 @@ void EmitLazy(RegexNode node) } // We matched the next iteration. Jump to the subsequent code. - writer.WriteLine($"goto {endLoop};"); + Goto(endLoop); writer.WriteLine(); // Now handle what happens when an iteration fails. We need to reset state to what it was before just that iteration @@ -2750,7 +2771,7 @@ void EmitLazy(RegexNode node) writer.WriteLine($"{iterationCount}--;"); using (EmitBlock(writer, $"if ({iterationCount} < 0)")) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } EmitStackPop(sawEmpty, "pos", startingPos); if (expressionHasCaptures) @@ -2760,15 +2781,15 @@ void EmitLazy(RegexNode node) SliceInputSpan(writer); if (doneLabel == originalDoneLabel) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } else { using (EmitBlock(writer, $"if ({iterationCount} == 0)")) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine(); @@ -2780,7 +2801,7 @@ void EmitLazy(RegexNode node) EmitStackPush(startingPos, iterationCount, sawEmpty); string skipBacktrack = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {skipBacktrack};"); + Goto(skipBacktrack); writer.WriteLine(); // Emit a backtracking section that restores the capture's state and then jumps to the previous done label @@ -2793,18 +2814,18 @@ void EmitLazy(RegexNode node) { using (EmitBlock(writer, $"if ({sawEmpty} == 0)")) { - writer.WriteLine($"goto {body};"); + Goto(body); } } else { using (EmitBlock(writer, $"if ({CountIsLessThan(iterationCount, maxIterations)} && {sawEmpty} == 0)")) { - writer.WriteLine($"goto {body};"); + Goto(body); } } - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); writer.WriteLine(); doneLabel = backtrack; @@ -2863,7 +2884,7 @@ void EmitSingleCharRepeater(RegexNode node, bool emitLengthCheck = true) writer.WriteLine(")"); using (EmitBlock(writer, null)) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } } else @@ -3023,7 +3044,7 @@ void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired = { using (EmitBlock(writer, $"if ({CountIsLessThan(iterationLocal, minIterations)})")) { - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); } writer.WriteLine(); } @@ -3165,11 +3186,11 @@ void EmitLoop(RegexNode node) (false, false) => $"if (pos != {startingPos} && {CountIsLessThan(iterationCount, maxIterations)})", })) { - writer.WriteLine($"goto {body};"); + Goto(body); } // We've matched as many iterations as we can with this configuration. Jump to what comes after the loop. - writer.WriteLine($"goto {endLoop};"); + Goto(endLoop); writer.WriteLine(); // Now handle what happens when an iteration fails, which could be an initial failure or it @@ -3179,7 +3200,7 @@ void EmitLoop(RegexNode node) writer.WriteLine($"{iterationCount}--;"); using (EmitBlock(writer, $"if ({iterationCount} < 0)")) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } EmitStackPop("pos", startingPos); if (expressionHasCaptures) @@ -3192,12 +3213,12 @@ void EmitLoop(RegexNode node) { using (EmitBlock(writer, $"if ({iterationCount} == 0)")) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } using (EmitBlock(writer, $"if ({CountIsLessThan(iterationCount, minIterations)})")) { - writer.WriteLine($"goto {(childBacktracks ? doneLabel : originalDoneLabel)};"); + Goto(childBacktracks ? doneLabel : originalDoneLabel); } } @@ -3210,16 +3231,16 @@ void EmitLoop(RegexNode node) { if (childBacktracks) { - writer.WriteLine($"goto {endLoop};"); + Goto(endLoop); writer.WriteLine(); string backtrack = ReserveName("LoopBacktrack"); MarkLabel(backtrack, emitSemicolon: false); using (EmitBlock(writer, $"if ({iterationCount} == 0)")) { - writer.WriteLine($"goto {originalDoneLabel};"); + Goto(originalDoneLabel); } - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); doneLabel = backtrack; } @@ -3234,7 +3255,7 @@ void EmitLoop(RegexNode node) // Skip past the backtracking section string end = ReserveName("SkipBacktrack"); - writer.WriteLine($"goto {end};"); + Goto(end); writer.WriteLine(); // Emit a backtracking section that restores the loop's state and then jumps to the previous done label @@ -3242,7 +3263,7 @@ void EmitLoop(RegexNode node) MarkLabel(backtrack, emitSemicolon: false); EmitStackPop(iterationCount, startingPos); - writer.WriteLine($"goto {doneLabel};"); + Goto(doneLabel); writer.WriteLine(); doneLabel = backtrack; @@ -3394,7 +3415,7 @@ private static void EmitTimeoutCheck(IndentedTextWriter writer, bool hasTimeout) } } - private static bool EmitInitializeCultureForGoIfNecessary(IndentedTextWriter writer, RegexMethod rm) + private static bool EmitInitializeCultureForTryMatchAtCurrentPositionIfNecessary(IndentedTextWriter writer, RegexMethod rm) { if (((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index cd77b9271d2258..a0be5ef4eb69ac 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -1096,7 +1096,6 @@ protected void EmitTryMatchAtCurrentPosition() LocalBuilder pos = DeclareInt32(); LocalBuilder slice = DeclareReadOnlySpanChar(); LocalBuilder end = DeclareInt32(); - Label stopSuccessLabel = DefineLabel(); Label doneLabel = DefineLabel(); Label originalDoneLabel = doneLabel; if (_hasTimeout) @@ -1105,7 +1104,7 @@ protected void EmitTryMatchAtCurrentPosition() } // CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant - InitializeCultureForGoIfNecessary(); + InitializeCultureForTryMatchAtCurrentPositionIfNecessary(); // ReadOnlySpan inputSpan = input; // int end = base.runtextend; @@ -1143,11 +1142,10 @@ protected void EmitTryMatchAtCurrentPosition() // Emit the code for all nodes in the tree. EmitNode(node); - // Success: // pos += sliceStaticPos; // base.runtextpos = pos; // Capture(0, originalpos, pos); - MarkLabel(stopSuccessLabel); + // return true; Ldthis(); Ldloc(pos); if (sliceStaticPos > 0) @@ -1163,10 +1161,17 @@ protected void EmitTryMatchAtCurrentPosition() Ldloc(originalPos); Ldloc(pos); Call(s_captureMethod); - // return true; Ldc(1); Ret(); + // NOTE: The following is a difference from the source generator. The source generator emits: + // UncaptureUntil(0); + // return false; + // at every location where the all-up match is known to fail. In contrast, the compiler currently + // emits this uncapture/return code in one place and jumps to it upon match failure. The difference + // stems primarily from the return-at-each-location pattern resulting in cleaner / easier to read + // source code, which is not an issue for RegexCompiler emitting IL instead of C#. + // If the graph contained captures, undo any remaining to handle failed matches. if (expressionHasCaptures) { @@ -4011,7 +4016,7 @@ protected void EmitScan(DynamicMethod tryFindNextStartingPositionMethod, Dynamic Ret(); } - private void InitializeCultureForGoIfNecessary() + private void InitializeCultureForTryMatchAtCurrentPositionIfNecessary() { _textInfo = null; if ((_options & RegexOptions.CultureInvariant) == 0)