diff --git a/docs/design/coreclr/jit/ryujit-overview.md b/docs/design/coreclr/jit/ryujit-overview.md
index cdb17002ee1974..5e63d38e98f664 100644
--- a/docs/design/coreclr/jit/ryujit-overview.md
+++ b/docs/design/coreclr/jit/ryujit-overview.md
@@ -222,6 +222,7 @@ The top-level function of interest is `Compiler::compCompile`. It invokes the fo
| [Common Subexpression Elimination (CSE)](#cse) | Elimination of redundant subexressions based on value numbers. |
| [Assertion Propagation](#assertion-propagation) | Utilizes value numbers to propagate and transform based on properties such as non-nullness. |
| [Range analysis](#range-analysis) | Eliminate array index range checks based on value numbers and assertions |
+| [Induction variable optimization](#iv-opts) | Optimize induction variables used inside natural loops based on scalar evolution analysis |
| [VN-based dead store elimination](#vn-based-dead-store-elimination) | Eliminate stores that do not change the value of a local. |
| [If conversion](#if-conversion) | Transform conditional definitions into `GT_SELECT` operators. |
| [Rationalization](#rationalization) | Flowgraph order changes from `FGOrderTree` to `FGOrderLinear`. All `GT_COMMA` nodes are transformed. |
@@ -347,6 +348,11 @@ reused.
Utilizes value numbers to propagate and transform based on properties such as non-nullness.
+### Induction variable optimization
+
+Performs scalar evolution analysis and utilized it to optimize induction variables inside loops.
+Currently this entails IV widening which is done on x64 only.
+
### Range analysis
Optimize array index range checks based on value numbers and assertions.
diff --git a/docs/design/coreclr/jit/ryujit-tutorial.md b/docs/design/coreclr/jit/ryujit-tutorial.md
index 34466e45afbcdc..ec900ccc8cd937 100644
--- a/docs/design/coreclr/jit/ryujit-tutorial.md
+++ b/docs/design/coreclr/jit/ryujit-tutorial.md
@@ -447,6 +447,10 @@ This is the same diagram as before, but with additional links to indicate execut
- Determine initial value for dependent phis
- Eliminate checks where the range of the index is within the check range
+### Induction Variable Optimization
+- Perform scalar evolution analysis to describe values of IR nodes inside loops
+- Perform IV widening on x64 to avoid unnecessary zero extensions for array/span indexing
+
## RyuJIT Back-End
### Rationalization
diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index ae08a27e4c00aa..6e114f0f04a119 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -94,7 +94,6 @@ set( JIT_SOURCES
bitset.cpp
block.cpp
buildstring.cpp
- layout.cpp
codegencommon.cpp
codegenlinear.cpp
compiler.cpp
@@ -123,14 +122,15 @@ set( JIT_SOURCES
gentree.cpp
gschecks.cpp
hashbv.cpp
- hwintrinsic.cpp
+ helperexpansion.cpp
hostallocator.cpp
+ hwintrinsic.cpp
ifconversion.cpp
- helperexpansion.cpp
- indirectcalltransformer.cpp
- importercalls.cpp
importer.cpp
+ importercalls.cpp
importervectorization.cpp
+ indirectcalltransformer.cpp
+ inductionvariableopts.cpp
inline.cpp
inlinepolicy.cpp
instr.cpp
@@ -138,6 +138,7 @@ set( JIT_SOURCES
jiteh.cpp
jithashtable.cpp
jitmetadata.cpp
+ layout.cpp
lclmorph.cpp
lclvars.cpp
likelyclass.cpp
@@ -152,7 +153,6 @@ set( JIT_SOURCES
objectalloc.cpp
optcse.cpp
optimizebools.cpp
- switchrecognition.cpp
optimizer.cpp
patchpoint.cpp
phase.cpp
@@ -165,6 +165,7 @@ set( JIT_SOURCES
regalloc.cpp
registerargconvention.cpp
regset.cpp
+ scev.cpp
scopeinfo.cpp
sideeffects.cpp
sm.cpp
@@ -173,6 +174,7 @@ set( JIT_SOURCES
ssabuilder.cpp
ssarenamestate.cpp
stacklevelsetter.cpp
+ switchrecognition.cpp
treelifeupdater.cpp
unwind.cpp
utils.cpp
@@ -359,6 +361,7 @@ set( JIT_HEADERS
registerargconvention.h
register.h
regset.h
+ scev.h
sideeffects.h
simd.h
simdashwintrinsic.h
diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis
index 95dd3dc305689b..98c374bea8f33f 100644
--- a/src/coreclr/jit/clrjit.natvis
+++ b/src/coreclr/jit/clrjit.natvis
@@ -86,6 +86,11 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
{gtTreeID, d}: [{gtOper,en}, {gtType,en} V{((GenTreeLclFld*)this)->_gtLclNum,u}[+{((GenTreeLclFld*)this)->m_lclOffs,u}]]
+
+
+ [{Oper,en}, {Type,en}]
+
+
LinearScan
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 9fded7a13ccb0c..60b1a316c114aa 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -4893,6 +4893,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
bool doValueNum = true;
bool doLoopHoisting = true;
bool doCopyProp = true;
+ bool doOptimizeIVs = true;
bool doBranchOpt = true;
bool doCse = true;
bool doAssertionProp = true;
@@ -4905,6 +4906,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
doSsa = (JitConfig.JitDoSsa() != 0);
doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0);
doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0);
+ doOptimizeIVs = doSsa && (JitConfig.JitDoOptimizeIVs() != 0);
doLoopHoisting = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
doBranchOpt = doValueNum && (JitConfig.JitDoRedundantBranchOpts() != 0);
@@ -5005,6 +5007,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
DoPhase(this, PHASE_OPTIMIZE_INDEX_CHECKS, &Compiler::rangeCheckPhase);
}
+ if (doOptimizeIVs)
+ {
+ // Simplify and optimize induction variables used in natural loops
+ //
+ DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables);
+ }
+
if (doVNBasedDeadStoreRemoval)
{
// Note: this invalidates SSA and value numbers on tree nodes.
@@ -9409,6 +9418,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#pragma comment(linker, "/include:cLoops")
#pragma comment(linker, "/include:cLoopsA")
#pragma comment(linker, "/include:cLoop")
+#pragma comment(linker, "/include:cScev")
#pragma comment(linker, "/include:cTreeFlags")
#pragma comment(linker, "/include:cVN")
@@ -9434,6 +9444,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#pragma comment(linker, "/include:dCVarSet")
#pragma comment(linker, "/include:dLoop")
#pragma comment(linker, "/include:dLoops")
+#pragma comment(linker, "/include:dScev")
#pragma comment(linker, "/include:dTreeFlags")
#pragma comment(linker, "/include:dVN")
@@ -9677,24 +9688,38 @@ JITDBGAPI void __cdecl cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
JITDBGAPI void __cdecl cLoops(Compiler* comp)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
- printf("===================================================================== *NewLoops %u\n", sequenceNumber++);
+ printf("===================================================================== *Loops %u\n", sequenceNumber++);
FlowGraphNaturalLoops::Dump(comp->m_loops);
}
JITDBGAPI void __cdecl cLoopsA(Compiler* comp, FlowGraphNaturalLoops* loops)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
- printf("===================================================================== *NewLoopsA %u\n", sequenceNumber++);
+ printf("===================================================================== *LoopsA %u\n", sequenceNumber++);
FlowGraphNaturalLoops::Dump(loops);
}
JITDBGAPI void __cdecl cLoop(Compiler* comp, FlowGraphNaturalLoop* loop)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
- printf("===================================================================== *NewLoop %u\n", sequenceNumber++);
+ printf("===================================================================== *Loop %u\n", sequenceNumber++);
FlowGraphNaturalLoop::Dump(loop);
}
+JITDBGAPI void __cdecl cScev(Compiler* comp, Scev* scev)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Scev %u\n", sequenceNumber++);
+ if (scev == nullptr)
+ {
+ printf(" NULL\n");
+ }
+ else
+ {
+ scev->Dump(comp);
+ }
+}
+
JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
@@ -10285,6 +10310,11 @@ JITDBGAPI void __cdecl dLoop(FlowGraphNaturalLoop* loop)
cLoop(JitTls::GetCompiler(), loop);
}
+JITDBGAPI void __cdecl dScev(Scev* scev)
+{
+ cScev(JitTls::GetCompiler(), scev);
+}
+
JITDBGAPI void __cdecl dTreeFlags(GenTree* tree)
{
cTreeFlags(JitTls::GetCompiler(), tree);
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index cbe7fb95046a3d..f3712bebf21d09 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -42,6 +42,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "jitexpandarray.h"
#include "tinyarray.h"
#include "valuenum.h"
+#include "scev.h"
#include "namedintrinsiclist.h"
#ifdef LATE_DISASM
#include "disasm.h"
@@ -4972,7 +4973,7 @@ class Compiler
#ifdef DEBUG
jitstd::vector* fgBBOrder; // ordered vector of BBs
#endif
- // Used as a quick check for whether loop alignment should look for natural loops.
+ // Used as a quick check for whether phases downstream of loop finding should look for natural loops.
// If true: there may or may not be any natural loops in the flow graph, so try to find them
// If false: there's definitely not any natural loops in the flow graph
bool fgMightHaveNaturalLoops;
@@ -7411,6 +7412,18 @@ class Compiler
BasicBlock* basicBlock);
#endif
+ PhaseStatus optInductionVariables();
+ bool optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop);
+ bool optIsIVWideningProfitable(unsigned lclNum,
+ BasicBlock* initBlock,
+ bool initedToConstant,
+ FlowGraphNaturalLoop* loop,
+ ArrayStack& ivUses);
+ void optBestEffortReplaceNarrowIVUses(
+ unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt);
+ void optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt);
+ void optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop);
+
// Redundant branch opts
//
PhaseStatus optRedundantBranches();
diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h
index 835d85f798d29b..e986682894c3b6 100644
--- a/src/coreclr/jit/compmemkind.h
+++ b/src/coreclr/jit/compmemkind.h
@@ -50,6 +50,7 @@ CompMemKindMacro(LoopOpt)
CompMemKindMacro(LoopClone)
CompMemKindMacro(LoopUnroll)
CompMemKindMacro(LoopHoist)
+CompMemKindMacro(LoopIVOpts)
CompMemKindMacro(Unknown)
CompMemKindMacro(RangeCheck)
CompMemKindMacro(CopyProp)
diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h
index 23930985319769..10b60167be4224 100644
--- a/src/coreclr/jit/compphases.h
+++ b/src/coreclr/jit/compphases.h
@@ -84,6 +84,7 @@ CompPhaseNameMacro(PHASE_BUILD_SSA_DF, "SSA: DF",
CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", false, PHASE_BUILD_SSA, false)
CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", false, PHASE_BUILD_SSA, false)
CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", false, -1, false)
+CompPhaseNameMacro(PHASE_OPTIMIZE_INDUCTION_VARIABLES, "Optimize Induction Variables", false, -1, false)
CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", false, -1, false)
CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", false, -1, false)
CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", false, -1, false)
diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp
new file mode 100644
index 00000000000000..d30202680976e0
--- /dev/null
+++ b/src/coreclr/jit/inductionvariableopts.cpp
@@ -0,0 +1,676 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file contains code to optimize induction variables in loops based on
+// scalar evolution analysis (see scev.h and scev.cpp for more information
+// about the scalar evolution analysis).
+//
+// Currently the only optimization done is widening of primary induction
+// variables from 32 bits into 64 bits. This is generally only profitable on
+// x64 that does not allow zero extension of 32-bit values in addressing modes
+// (in contrast, arm64 does have the capability of including zero extensions in
+// addressing modes). For x64 this saves a zero extension for every array
+// access inside the loop, in exchange for some widening or narrowing stores
+// outside the loop:
+// - To make sure the new widened IV starts at the right value it is
+// initialized to the value of the narrow IV outside the loop (either in the
+// preheader or at the def location of the narrow IV). Usually the start
+// value is a constant, in which case the widened IV is just initialized to
+// the constant value.
+// - If the narrow IV is used after the loop we need to store it back from
+// the widened IV in the exits. We depend on liveness sets to figure out
+// which exits to insert IR into.
+//
+// These steps ensure that the wide IV has the right value to begin with and
+// the old narrow IV still has the right value after the loop. Additionally,
+// we must replace every use of the narrow IV inside the loop with the widened
+// IV. This is done by a traversal of the IR inside the loop. We do not
+// actually widen the uses of the IV; rather, we keep all uses and defs as
+// 32-bit, which the backend is able to handle efficiently on x64. Because of
+// this we do not need to worry about overflow.
+//
+
+#include "jitpch.h"
+#include "scev.h"
+
+//------------------------------------------------------------------------
+// optCanSinkWidenedIV: Check to see if we are able to sink a store to the old
+// local into the exits of a loop if we decide to widen.
+//
+// Parameters:
+// lclNum - The primary induction variable
+// loop - The loop
+//
+// Returns:
+// True if we can sink a store to the old local after widening.
+//
+// Remarks:
+// This handles the situation where the primary induction variable is used
+// after the loop. In those cases we need to store the widened local back
+// into the old one in the exits where the IV variable is live.
+//
+// We are able to sink when none of the exits are critical blocks, in the
+// sense that all their predecessors must come from inside the loop. Loop
+// exit canonicalization guarantees this for regular exit blocks. It is not
+// guaranteed for exceptional exits, but we do not expect to widen IVs that
+// are live into exceptional exits since those are marked DNER which makes it
+// unprofitable anyway.
+//
+// Note that there may be natural loops that have not had their regular exits
+// canonicalized at the time when IV opts run, in particular if RBO/assertion
+// prop makes a previously unnatural loop natural. This function accounts for
+// and rejects these cases.
+//
+bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop)
+{
+ LclVarDsc* dsc = lvaGetDesc(lclNum);
+
+ BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) {
+
+ if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+ {
+ JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum);
+ return BasicBlockVisit::Continue;
+ }
+
+ for (BasicBlock* pred : exit->PredBlocks())
+ {
+ if (!loop->ContainsBlock(pred))
+ {
+ JITDUMP(" Cannot safely sink widened version of V%02u into exit " FMT_BB " of " FMT_LP
+ "; it has a non-loop pred " FMT_BB "\n",
+ lclNum, exit->bbNum, loop->GetIndex(), pred->bbNum);
+ return BasicBlockVisit::Abort;
+ }
+ }
+
+ return BasicBlockVisit::Continue;
+ });
+
+#ifdef DEBUG
+ // We currently do not expect to ever widen IVs that are live into
+ // exceptional exits. Such IVs are expected to have been marked DNER
+ // previously (EH write-thru is only for single def locals) which makes it
+ // unprofitable. If this ever changes we need some more expansive handling
+ // here.
+ loop->VisitLoopBlocks([=](BasicBlock* block) {
+
+ block->VisitAllSuccs(this, [=](BasicBlock* succ) {
+ if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ))
+ {
+ assert(!VarSetOps::IsMember(this, succ->bbLiveIn, dsc->lvVarIndex) &&
+ "Candidate IV for widening is live into exceptional exit");
+ }
+
+ return BasicBlockVisit::Continue;
+ });
+
+ return BasicBlockVisit::Continue;
+ });
+#endif
+
+ return result != BasicBlockVisit::Abort;
+}
+
+//------------------------------------------------------------------------
+// optIsIVWideningProfitable: Check to see if IV widening is profitable.
+//
+// Parameters:
+// lclNum - The primary induction variable
+// initBlock - The block in where the new IV would be initialized
+// initedToConstant - Whether or not the new IV will be initialized to a constant
+// loop - The loop
+// ivUses - Statements in which "lclNum" appears will be added to this list
+//
+//
+// Returns:
+// True if IV widening is profitable.
+//
+// Remarks:
+// IV widening is generally profitable when it allows us to remove casts
+// inside the loop. However, it may also introduce other reg-reg moves:
+// 1. We may need to store the narrow IV into the wide one in the
+// preheader. This is necessary when the start value is not constant. If
+// the start value _is_ constant then we assume that the constant store to
+// the narrow local will be a DCE'd.
+// 2. We need to store the wide IV back into the narrow one in each of
+// the exits where the narrow IV is live-in.
+//
+bool Compiler::optIsIVWideningProfitable(unsigned lclNum,
+ BasicBlock* initBlock,
+ bool initedToConstant,
+ FlowGraphNaturalLoop* loop,
+ ArrayStack& ivUses)
+{
+ for (FlowGraphNaturalLoop* otherLoop : m_loops->InReversePostOrder())
+ {
+ if (otherLoop == loop)
+ continue;
+
+ for (Statement* stmt : otherLoop->GetHeader()->Statements())
+ {
+ if (!stmt->IsPhiDefnStmt())
+ break;
+
+ if (stmt->GetRootNode()->AsLclVarCommon()->GetLclNum() == lclNum)
+ {
+ JITDUMP(" V%02u has a phi [%06u] in " FMT_LP "'s header " FMT_BB "\n", lclNum,
+ dspTreeID(stmt->GetRootNode()), otherLoop->GetIndex(), otherLoop->GetHeader()->bbNum);
+ // TODO-CQ: We can legally widen these cases, but LSRA is
+ // unhappy about some of the lifetimes we create when we do
+ // this. This particularly affects cloned loops.
+ return false;
+ }
+ }
+ }
+
+ const weight_t ExtensionCost = 2;
+ const int ExtensionSize = 3;
+
+ weight_t savedCost = 0;
+ int savedSize = 0;
+
+ loop->VisitLoopBlocks([&](BasicBlock* block) {
+ for (Statement* stmt : block->NonPhiStatements())
+ {
+ bool hasUse = false;
+ int numExtensions = 0;
+ for (GenTree* node : stmt->TreeList())
+ {
+ if (!node->OperIs(GT_CAST))
+ {
+ hasUse |= node->OperIsLocal() && (node->AsLclVarCommon()->GetLclNum() == lclNum);
+ continue;
+ }
+
+ GenTreeCast* cast = node->AsCast();
+ if ((cast->gtCastType != TYP_LONG) || !cast->IsUnsigned() || cast->gtOverflow())
+ {
+ continue;
+ }
+
+ GenTree* op = cast->CastOp();
+ if (!op->OperIs(GT_LCL_VAR) || (op->AsLclVarCommon()->GetLclNum() != lclNum))
+ {
+ continue;
+ }
+
+ // If this is already the source of a store then it is going to be
+ // free in our backends regardless.
+ GenTree* parent = node->gtGetParent(nullptr);
+ if ((parent != nullptr) && parent->OperIs(GT_STORE_LCL_VAR))
+ {
+ continue;
+ }
+
+ numExtensions++;
+ }
+
+ if (hasUse)
+ {
+ ivUses.Push(stmt);
+ }
+
+ if (numExtensions > 0)
+ {
+ JITDUMP(" Found %d zero extensions in " FMT_STMT "\n", numExtensions, stmt->GetID());
+
+ savedSize += numExtensions * ExtensionSize;
+ savedCost += numExtensions * block->getBBWeight(this) * ExtensionCost;
+ }
+ }
+
+ return BasicBlockVisit::Continue;
+ });
+
+ if (!initedToConstant)
+ {
+ // We will need to store the narrow IV into the wide one in the init
+ // block. We only cost this when init value is not a constant since
+ // otherwise we assume that constant initialization of the narrow local
+ // will be DCE'd.
+ savedSize -= ExtensionSize;
+ savedCost -= initBlock->getBBWeight(this) * ExtensionCost;
+ }
+
+ // Now account for the cost of sinks.
+ LclVarDsc* dsc = lvaGetDesc(lclNum);
+ loop->VisitRegularExitBlocks([&](BasicBlock* exit) {
+ if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+ {
+ savedSize -= ExtensionSize;
+ savedCost -= exit->getBBWeight(this) * ExtensionCost;
+ }
+ return BasicBlockVisit::Continue;
+ });
+
+ const weight_t ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT = 2;
+ weight_t cycleImprovementPerInvoc = savedCost / fgFirstBB->getBBWeight(this);
+
+ JITDUMP(" Estimated cycle improvement: " FMT_WT " cycles per invocation\n", cycleImprovementPerInvoc);
+ JITDUMP(" Estimated size improvement: %d bytes\n", savedSize);
+
+ if ((cycleImprovementPerInvoc > 0) &&
+ ((cycleImprovementPerInvoc * ALLOWED_SIZE_REGRESSION_PER_CYCLE_IMPROVEMENT) >= -savedSize))
+ {
+ JITDUMP(" Widening is profitable (cycle improvement)\n");
+ return true;
+ }
+
+ const weight_t ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT = 0.01;
+
+ if ((savedSize > 0) && ((savedSize * ALLOWED_CYCLE_REGRESSION_PER_SIZE_IMPROVEMENT) >= -cycleImprovementPerInvoc))
+ {
+ JITDUMP(" Widening is profitable (size improvement)\n");
+ return true;
+ }
+
+ JITDUMP(" Widening is not profitable\n");
+ return false;
+}
+
+//------------------------------------------------------------------------
+// optSinkWidenedIV: Create stores back to the narrow IV in the exits where
+// that is necessary.
+//
+// Parameters:
+// lclNum - Narrow version of primary induction variable
+// newLclNum - Wide version of primary induction variable
+// loop - The loop
+//
+// Returns:
+// True if any store was created in any exit block.
+//
+void Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop)
+{
+ LclVarDsc* dsc = lvaGetDesc(lclNum);
+ loop->VisitRegularExitBlocks([=](BasicBlock* exit) {
+ if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex))
+ {
+ return BasicBlockVisit::Continue;
+ }
+
+ GenTree* narrowing = gtNewCastNode(TYP_INT, gtNewLclvNode(newLclNum, TYP_LONG), false, TYP_INT);
+ GenTree* store = gtNewStoreLclVarNode(lclNum, narrowing);
+ Statement* newStmt = fgNewStmtFromTree(store);
+ JITDUMP("Narrow IV local V%02u live into exit block " FMT_BB "; sinking a narrowing\n", lclNum, exit->bbNum);
+ DISPSTMT(newStmt);
+ fgInsertStmtAtBeg(exit, newStmt);
+
+ return BasicBlockVisit::Continue;
+ });
+}
+
+//------------------------------------------------------------------------
+// optReplaceWidenedIV: Replace uses of the narrow IV with the wide IV in the
+// specified statement.
+//
+// Parameters:
+// lclNum - Narrow version of primary induction variable
+// newLclNum - Wide version of primary induction variable
+// stmt - The statement to replace uses in.
+//
+void Compiler::optReplaceWidenedIV(unsigned lclNum, unsigned ssaNum, unsigned newLclNum, Statement* stmt)
+{
+ struct ReplaceVisitor : GenTreeVisitor
+ {
+ private:
+ unsigned m_lclNum;
+ unsigned m_ssaNum;
+ unsigned m_newLclNum;
+
+ bool IsLocal(GenTreeLclVarCommon* tree)
+ {
+ return (tree->GetLclNum() == m_lclNum) &&
+ ((m_ssaNum == SsaConfig::RESERVED_SSA_NUM) || (tree->GetSsaNum() == m_ssaNum));
+ }
+
+ public:
+ bool MadeChanges = false;
+
+ enum
+ {
+ DoPreOrder = true,
+ };
+
+ ReplaceVisitor(Compiler* comp, unsigned lclNum, unsigned ssaNum, unsigned newLclNum)
+ : GenTreeVisitor(comp), m_lclNum(lclNum), m_ssaNum(ssaNum), m_newLclNum(newLclNum)
+ {
+ }
+
+ fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+ {
+ GenTree* node = *use;
+ if (node->OperIs(GT_CAST))
+ {
+ GenTreeCast* cast = node->AsCast();
+ if ((cast->gtCastType == TYP_LONG) && cast->IsUnsigned() && !cast->gtOverflow())
+ {
+ GenTree* op = cast->CastOp();
+ if (op->OperIs(GT_LCL_VAR) && IsLocal(op->AsLclVarCommon()))
+ {
+ *use = m_compiler->gtNewLclvNode(m_newLclNum, TYP_LONG);
+ MadeChanges = true;
+ return fgWalkResult::WALK_SKIP_SUBTREES;
+ }
+ }
+ }
+ else if (node->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR, GT_LCL_FLD, GT_STORE_LCL_FLD) &&
+ IsLocal(node->AsLclVarCommon()))
+ {
+ switch (node->OperGet())
+ {
+ case GT_LCL_VAR:
+ node->AsLclVarCommon()->SetLclNum(m_newLclNum);
+ // No cast needed -- the backend allows TYP_INT uses of TYP_LONG locals.
+ break;
+ case GT_STORE_LCL_VAR:
+ {
+ node->AsLclVarCommon()->SetLclNum(m_newLclNum);
+ node->gtType = TYP_LONG;
+ node->AsLclVarCommon()->Data() =
+ m_compiler->gtNewCastNode(TYP_LONG, node->AsLclVarCommon()->Data(), true, TYP_LONG);
+ break;
+ }
+ case GT_LCL_FLD:
+ case GT_STORE_LCL_FLD:
+ assert(!"Unexpected field use for local not marked as DNER");
+ break;
+ default:
+ break;
+ }
+
+ MadeChanges = true;
+ }
+
+ return fgWalkResult::WALK_CONTINUE;
+ }
+ };
+
+ ReplaceVisitor visitor(this, lclNum, ssaNum, newLclNum);
+ visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
+ if (visitor.MadeChanges)
+ {
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+ JITDUMP("New tree:\n", dspTreeID(stmt->GetRootNode()));
+ DISPTREE(stmt->GetRootNode());
+ JITDUMP("\n");
+ }
+ else
+ {
+ JITDUMP("No replacements made\n");
+ }
+}
+
+//------------------------------------------------------------------------
+// optBestEffortReplaceNarrowIVUses: Try to find and replace uses of the specified
+// SSA def with a new local.
+//
+// Parameters:
+// lclNum - Previous local
+// ssaNum - Previous local SSA num
+// newLclNum - New local to replace with
+// block - Block to replace in
+// firstStmt - First statement in "block" to start replacing in
+//
+// Remarks:
+// This function is best effort; it might not find all uses of the provided
+// SSA num, particularly because it does not follow into joins. Note that we
+// only use this to replace uses of the narrow IV outside the loop; inside
+// the loop we do ensure that all uses/defs are replaced.
+// Keeping it best-effort outside the loop is ok; there is no correctness
+// issue since we do not invalidate the value of the old narrow IV in any
+// way, but it may mean we end up leaving the narrow IV live concurrently
+// with the new widened IV, increasing register pressure.
+//
+void Compiler::optBestEffortReplaceNarrowIVUses(
+ unsigned lclNum, unsigned ssaNum, unsigned newLclNum, BasicBlock* block, Statement* firstStmt)
+{
+ JITDUMP("Replacing V%02u -> V%02u in " FMT_BB " starting at " FMT_STMT "\n", lclNum, newLclNum, block->bbNum,
+ firstStmt == nullptr ? 0 : firstStmt->GetID());
+
+ for (Statement* stmt = firstStmt; stmt != nullptr; stmt = stmt->GetNextStmt())
+ {
+ JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lclNum, newLclNum, dspTreeID(stmt->GetRootNode()));
+ DISPSTMT(stmt);
+ JITDUMP("\n");
+
+ optReplaceWidenedIV(lclNum, ssaNum, newLclNum, stmt);
+ }
+
+ block->VisitRegularSuccs(this, [=](BasicBlock* succ) {
+ if (succ->GetUniquePred(this) == block)
+ {
+ optBestEffortReplaceNarrowIVUses(lclNum, ssaNum, newLclNum, succ, succ->firstStmt());
+ }
+
+ return BasicBlockVisit::Continue;
+ });
+}
+
+//------------------------------------------------------------------------
+// optInductionVariables: Try and optimize induction variables in the method.
+//
+// Returns:
+// PhaseStatus indicating if anything changed.
+//
+PhaseStatus Compiler::optInductionVariables()
+{
+ JITDUMP("*************** In optInductionVariables()\n");
+
+#ifdef DEBUG
+ static ConfigMethodRange s_range;
+ s_range.EnsureInit(JitConfig.JitEnableInductionVariableOptsRange());
+
+ if (!s_range.Contains(info.compMethodHash()))
+ {
+ return PhaseStatus::MODIFIED_NOTHING;
+ }
+#endif
+
+ if (!fgMightHaveNaturalLoops)
+ {
+ JITDUMP(" Skipping since this method has no natural loops\n");
+ return PhaseStatus::MODIFIED_NOTHING;
+ }
+
+ bool changed = false;
+
+ // Currently we only do IV widening which generally is only profitable for
+ // x64 because arm64 addressing modes can include the zero/sign-extension
+ // of the index for free.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(TARGET_XARCH) && defined(TARGET_64BIT)
+ m_dfsTree = fgComputeDfs();
+ m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
+
+ ScalarEvolutionContext scevContext(this);
+ JITDUMP("Widening primary induction variables:\n");
+ ArrayStack ivUses(getAllocator(CMK_LoopIVOpts));
+ for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
+ {
+ JITDUMP("Processing ");
+ DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop));
+ scevContext.ResetForLoop(loop);
+
+ for (Statement* stmt : loop->GetHeader()->Statements())
+ {
+ if (!stmt->IsPhiDefnStmt())
+ {
+ break;
+ }
+
+ JITDUMP("\n");
+
+ DISPSTMT(stmt);
+
+ GenTreeLclVarCommon* lcl = stmt->GetRootNode()->AsLclVarCommon();
+ LclVarDsc* lclDsc = lvaGetDesc(lcl);
+ if (lclDsc->TypeGet() != TYP_INT)
+ {
+ JITDUMP(" Type is %s, no widening to be done\n", varTypeName(lclDsc->TypeGet()));
+ continue;
+ }
+
+ // If the IV is not enregisterable then uses/defs are going to go
+ // to stack regardless. This check also filters out IVs that may be
+ // live into exceptional exits since those are always marked DNER.
+ if (lclDsc->lvDoNotEnregister)
+ {
+ JITDUMP(" V%02u is marked DNER\n", lcl->GetLclNum());
+ continue;
+ }
+
+ Scev* scev = scevContext.Analyze(loop->GetHeader(), stmt->GetRootNode());
+ if (scev == nullptr)
+ {
+ JITDUMP(" Could not analyze header PHI\n");
+ continue;
+ }
+
+ scev = scevContext.Simplify(scev);
+ JITDUMP(" => ");
+ DBEXEC(verbose, scev->Dump(this));
+ JITDUMP("\n");
+ if (!scev->OperIs(ScevOper::AddRec))
+ {
+ JITDUMP(" Not an addrec\n");
+ continue;
+ }
+
+ ScevAddRec* addRec = (ScevAddRec*)scev;
+
+ JITDUMP(" V%02u is a primary induction variable in " FMT_LP "\n", lcl->GetLclNum(), loop->GetIndex());
+
+ if (!optCanSinkWidenedIV(lcl->GetLclNum(), loop))
+ {
+ continue;
+ }
+
+ // Start value should always be an SSA use from outside the loop
+ // since we only widen primary IVs.
+ assert(addRec->Start->OperIs(ScevOper::Local));
+ ScevLocal* startLocal = (ScevLocal*)addRec->Start;
+ int64_t startConstant = 0;
+ bool initToConstant = startLocal->GetConstantValue(this, &startConstant);
+ LclSsaVarDsc* startSsaDsc = lclDsc->GetPerSsaData(startLocal->SsaNum);
+
+ BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock();
+ BasicBlock* initBlock = preheader;
+ if ((startSsaDsc->GetBlock() != nullptr) && (startSsaDsc->GetDefNode() != nullptr))
+ {
+ initBlock = startSsaDsc->GetBlock();
+ }
+
+ ivUses.Reset();
+ if (!optIsIVWideningProfitable(lcl->GetLclNum(), initBlock, initToConstant, loop, ivUses))
+ {
+ continue;
+ }
+
+ changed = true;
+
+ Statement* insertInitAfter = nullptr;
+ if (initBlock != preheader)
+ {
+ GenTree* narrowInitRoot = startSsaDsc->GetDefNode();
+ while (true)
+ {
+ GenTree* parent = narrowInitRoot->gtGetParent(nullptr);
+ if (parent == nullptr)
+ break;
+
+ narrowInitRoot = parent;
+ }
+
+ for (Statement* stmt : initBlock->Statements())
+ {
+ if (stmt->GetRootNode() == narrowInitRoot)
+ {
+ insertInitAfter = stmt;
+ break;
+ }
+ }
+
+ assert(insertInitAfter != nullptr);
+
+ if (insertInitAfter->IsPhiDefnStmt())
+ {
+ while ((insertInitAfter->GetNextStmt() != nullptr) &&
+ insertInitAfter->GetNextStmt()->IsPhiDefnStmt())
+ {
+ insertInitAfter = insertInitAfter->GetNextStmt();
+ }
+ }
+ }
+
+ Statement* initStmt = nullptr;
+ unsigned newLclNum = lvaGrabTemp(false DEBUGARG(printfAlloc("Widened IV V%02u", lcl->GetLclNum())));
+ INDEBUG(lclDsc = nullptr);
+ assert(startLocal->LclNum == lcl->GetLclNum());
+
+ if (initBlock != preheader)
+ {
+ JITDUMP("Adding initialization of new widened local to same block as reaching def outside loop, " FMT_BB
+ "\n",
+ initBlock->bbNum);
+ }
+ else
+ {
+ JITDUMP("Adding initialization of new widened local to preheader " FMT_BB "\n", initBlock->bbNum);
+ }
+
+ GenTree* initVal;
+ if (initToConstant)
+ {
+ initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG);
+ }
+ else
+ {
+ initVal = gtNewCastNode(TYP_LONG, gtNewLclvNode(lcl->GetLclNum(), TYP_INT), true, TYP_LONG);
+ }
+
+ GenTree* widenStore = gtNewTempStore(newLclNum, initVal);
+ initStmt = fgNewStmtFromTree(widenStore);
+ if (insertInitAfter != nullptr)
+ {
+ fgInsertStmtAfter(initBlock, insertInitAfter, initStmt);
+ }
+ else
+ {
+ fgInsertStmtNearEnd(initBlock, initStmt);
+ }
+
+ DISPSTMT(initStmt);
+ JITDUMP("\n");
+
+ JITDUMP(" Replacing uses of V%02u with widened version V%02u\n", lcl->GetLclNum(), newLclNum);
+
+ if (initStmt != nullptr)
+ {
+ JITDUMP(" Replacing on the way to the loop\n");
+ optBestEffortReplaceNarrowIVUses(lcl->GetLclNum(), startLocal->SsaNum, newLclNum, initBlock,
+ initStmt->GetNextStmt());
+ }
+
+ JITDUMP(" Replacing in the loop; %d statements with appearences\n", ivUses.Height());
+ for (int i = 0; i < ivUses.Height(); i++)
+ {
+ Statement* stmt = ivUses.Bottom(i);
+ JITDUMP("Replacing V%02u -> V%02u in [%06u]\n", lcl->GetLclNum(), newLclNum,
+ dspTreeID(stmt->GetRootNode()));
+ DISPSTMT(stmt);
+ JITDUMP("\n");
+ optReplaceWidenedIV(lcl->GetLclNum(), SsaConfig::RESERVED_SSA_NUM, newLclNum, stmt);
+ }
+
+ optSinkWidenedIV(lcl->GetLclNum(), newLclNum, loop);
+ }
+ }
+
+ fgInvalidateDfsTree();
+#endif
+
+ return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
+}
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index abc510d967a80d..7c6e95bbb88a3e 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -480,8 +480,9 @@ CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range
#if defined(OPT_CONFIG)
CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
-CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
-CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation
+CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
+CONFIG_INTEGER(JitDoOptimizeIVs, W("JitDoOptimizeIVs"), 1) // Perform optimization of induction variables
+CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagation
CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values
CONFIG_INTEGER(JitDoLoopInversion, W("JitDoLoopInversion"), 1) // Perform loop inversion on "for/while" loops
CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
@@ -496,6 +497,7 @@ CONFIG_STRING(JitOnlyOptimizeRange,
W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts
CONFIG_STRING(JitEnablePhysicalPromotionRange, W("JitEnablePhysicalPromotionRange"))
CONFIG_STRING(JitEnableCrossBlockLocalAssertionPropRange, W("JitEnableCrossBlockLocalAssertionPropRange"))
+CONFIG_STRING(JitEnableInductionVariableOptsRange, W("JitEnableInductionVariableOptsRange"))
CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp
new file mode 100644
index 00000000000000..81760593a8aba8
--- /dev/null
+++ b/src/coreclr/jit/scev.cpp
@@ -0,0 +1,821 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// This file contains code to analyze how the value of induction variables
+// evolve (scalar evolution analysis), and to turn them into the SCEV IR
+// defined in scev.h. The analysis is inspired by "Michael Wolfe. 1992. Beyond
+// induction variables." and also by LLVM's scalar evolution analysis.
+//
+// The main idea of scalar evolution nalysis is to give a closed form
+// describing the value of tree nodes inside loops even when taking into
+// account that they are changing on each loop iteration. This is useful for
+// optimizations that want to reason about values of IR nodes inside loops,
+// such as IV widening or strength reduction.
+//
+// To represent the possibility of evolution the SCEV IR includes the concept
+// of an add recurrence , which describes a value that
+// starts at "start" and changes by adding "step" at each iteration. The IR
+// nodes that change in this way (or depend on something that changes in this
+// way) are generally called induction variables.
+//
+// An add recurrence arises only when a local exists in the loop that is
+// mutated in each iteration. Such a local will naturally end up with a phi
+// node in the loop header. These locals are called primary (or basic)
+// induction variables. The non-primary IVs (which always must depend on the
+// primary IVs) are sometimes called secondary IVs.
+//
+// The job of the analysis is to go from a tree node to a SCEV node that
+// describes its value (possibly taking its evolution into account). Note that
+// SCEV nodes are immutable and the values they represent are _not_
+// flow-dependent; that is, they don't exist at a specific location inside the
+// loop, even though some particular tree node gave rise to that SCEV node. The
+// analysis itself _is_ flow-dependent and guarantees that the Scev* returned
+// describes the value that corresponds to what the tree node computes at its
+// specific location. However, it would be perfectly legal for two trees at
+// different locations in the loop to analyze to the same SCEV node (even
+// potentially returning the same pointer). For example, in theory "i" and "j"
+// in the following loop would both be represented by the same add recurrence
+// , and the analysis could even return the same Scev* for both of
+// them, even if it does not today:
+//
+// int i = 0;
+// while (true)
+// {
+// i++;
+// ...
+// int j = i - 1;
+// }
+//
+// Actually materializing the value of a SCEV node back into tree IR is not
+// implemented yet, but generally would depend on the availability of tree
+// nodes that compute the dependent values at the point where the IR is to be
+// materialized.
+//
+// Besides the add recurrences the analysis itself is generally a
+// straightforward translation from JIT IR into the SCEV IR. Creating the add
+// recurrences requires paying attention to the structure of PHIs, and
+// disambiguating the values coming from outside the loop and the values coming
+// from the backedges. Currently only simplistic add recurrences that do not
+// require recursive analysis are supported. These simplistic add recurrences
+// are always on the form i = i + k.
+//
+
+#include "jitpch.h"
+
+//------------------------------------------------------------------------
+// GetConstantValue: If this SSA use refers to a constant, then fetch that
+// constant.
+//
+// Parameters:
+// comp - Compiler instance
+// cns - [out] Constant value; only valid if this function returns true.
+//
+// Returns:
+// True if this SSA use refers to a constant; otherwise false,
+//
+bool ScevLocal::GetConstantValue(Compiler* comp, int64_t* cns)
+{
+ LclVarDsc* dsc = comp->lvaGetDesc(LclNum);
+ LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(SsaNum);
+ GenTreeLclVarCommon* defNode = ssaDsc->GetDefNode();
+ if ((defNode != nullptr) && defNode->Data()->OperIs(GT_CNS_INT, GT_CNS_LNG))
+ {
+ *cns = defNode->Data()->AsIntConCommon()->IntegralValue();
+ return true;
+ }
+
+ return false;
+}
+
+//------------------------------------------------------------------------
+// Scev::GetConstantValue: If this SCEV is always a constant (i.e. either an
+// inline constant or an SSA use referring to a constant) then obtain that
+// constant.
+//
+// Parameters:
+// comp - Compiler instance
+// cns - [out] Constant value; only valid if this function returns true.
+//
+// Returns:
+// True if a constant could be extracted.
+//
+bool Scev::GetConstantValue(Compiler* comp, int64_t* cns)
+{
+ if (OperIs(ScevOper::Constant))
+ {
+ *cns = ((ScevConstant*)this)->Value;
+ return true;
+ }
+
+ if (OperIs(ScevOper::Local))
+ {
+ return ((ScevLocal*)this)->GetConstantValue(comp, cns);
+ }
+
+ return false;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// Dump: Print this scev node to stdout.
+//
+// Parameters:
+// comp - Compiler instance
+//
+void Scev::Dump(Compiler* comp)
+{
+ switch (Oper)
+ {
+ case ScevOper::Constant:
+ {
+ ScevConstant* cns = (ScevConstant*)this;
+ printf("%zd", (ssize_t)cns->Value);
+ break;
+ }
+ case ScevOper::Local:
+ {
+ ScevLocal* invariantLocal = (ScevLocal*)this;
+ printf("V%02u.%u", invariantLocal->LclNum, invariantLocal->SsaNum);
+
+ int64_t cns;
+ if (invariantLocal->GetConstantValue(comp, &cns))
+ {
+ printf(" (%lld)", (long long)cns);
+ }
+ break;
+ }
+ case ScevOper::ZeroExtend:
+ case ScevOper::SignExtend:
+ {
+ ScevUnop* unop = (ScevUnop*)this;
+ printf("%cext<%d>(", unop->Oper == ScevOper::ZeroExtend ? 'z' : 's', genTypeSize(unop->Type) * 8);
+ unop->Op1->Dump(comp);
+ printf(")");
+ break;
+ }
+ case ScevOper::Add:
+ case ScevOper::Mul:
+ case ScevOper::Lsh:
+ {
+ ScevBinop* binop = (ScevBinop*)this;
+ printf("(");
+ binop->Op1->Dump(comp);
+ const char* op;
+ switch (binop->Oper)
+ {
+ case ScevOper::Add:
+ op = "+";
+ break;
+ case ScevOper::Mul:
+ op = "*";
+ break;
+ case ScevOper::Lsh:
+ op = "<<";
+ break;
+ default:
+ unreached();
+ }
+ printf(" %s ", op);
+ binop->Op2->Dump(comp);
+ printf(")");
+ break;
+ }
+ case ScevOper::AddRec:
+ {
+ ScevAddRec* addRec = (ScevAddRec*)this;
+ printf("<" FMT_LP, addRec->Loop->GetIndex());
+ printf(", ");
+ addRec->Start->Dump(comp);
+ printf(", ");
+ addRec->Step->Dump(comp);
+ printf(">");
+ break;
+ }
+ default:
+ unreached();
+ }
+}
+#endif
+
+//------------------------------------------------------------------------
+// ScalarEvolutionContext: Construct an instance of a context to do scalar evolution in.
+//
+// Parameters:
+// comp - Compiler instance
+//
+// Remarks:
+// After construction the context should be reset for a new loop by calling
+// ResetForLoop.
+//
+ScalarEvolutionContext::ScalarEvolutionContext(Compiler* comp)
+ : m_comp(comp), m_cache(comp->getAllocator(CMK_LoopIVOpts))
+{
+}
+
+//------------------------------------------------------------------------
+// ResetForLoop: Reset the internal cache in preparation of scalar
+// evolution analysis inside a new loop.
+//
+// Parameters:
+// loop - The loop.
+//
+void ScalarEvolutionContext::ResetForLoop(FlowGraphNaturalLoop* loop)
+{
+ m_loop = loop;
+ m_cache.RemoveAll();
+}
+
+//------------------------------------------------------------------------
+// NewConstant: Create a SCEV node that represents a constant.
+//
+// Returns:
+// The new node.
+//
+ScevConstant* ScalarEvolutionContext::NewConstant(var_types type, int64_t value)
+{
+ ScevConstant* constant = new (m_comp, CMK_LoopIVOpts) ScevConstant(type, value);
+ return constant;
+}
+
+//------------------------------------------------------------------------
+// NewLocal: Create a SCEV node that represents an invariant local (i.e. a
+// use of an SSA def from outside the loop).
+//
+// Parameters:
+// lclNum - The local
+// ssaNum - The SSA number of the def outside the loop that is being used.
+//
+// Returns:
+// The new node.
+//
+ScevLocal* ScalarEvolutionContext::NewLocal(unsigned lclNum, unsigned ssaNum)
+{
+ var_types type = genActualType(m_comp->lvaGetDesc(lclNum));
+ ScevLocal* invariantLocal = new (m_comp, CMK_LoopIVOpts) ScevLocal(type, lclNum, ssaNum);
+ return invariantLocal;
+}
+
+//------------------------------------------------------------------------
+// NewExtension: Create a SCEV node that represents a zero or sign extension.
+//
+// Parameters:
+// oper - The operation (ScevOper::ZeroExtend or ScevOper::SignExtend)
+// targetType - The target type of the extension
+// op - The operand being extended.
+//
+// Returns:
+// The new node.
+//
+ScevUnop* ScalarEvolutionContext::NewExtension(ScevOper oper, var_types targetType, Scev* op)
+{
+ assert(op != nullptr);
+ ScevUnop* ext = new (m_comp, CMK_LoopIVOpts) ScevUnop(oper, targetType, op);
+ return ext;
+}
+
+//------------------------------------------------------------------------
+// NewBinop: Create a SCEV node that represents a binary operation.
+//
+// Parameters:
+// oper - The operation
+// op1 - First operand
+// op2 - Second operand
+//
+// Returns:
+// The new node.
+//
+ScevBinop* ScalarEvolutionContext::NewBinop(ScevOper oper, Scev* op1, Scev* op2)
+{
+ assert((op1 != nullptr) && (op2 != nullptr));
+ ScevBinop* binop = new (m_comp, CMK_LoopIVOpts) ScevBinop(oper, op1->Type, op1, op2);
+ return binop;
+}
+
+//------------------------------------------------------------------------
+// NewAddRec: Create a SCEV node that represents a new add recurrence.
+//
+// Parameters:
+// loop - The loop where this add recurrence is evolving
+// start - Value of the recurrence at the first iteration
+// step - Step value of the recurrence
+//
+// Returns:
+// The new node.
+//
+ScevAddRec* ScalarEvolutionContext::NewAddRec(Scev* start, Scev* step)
+{
+ assert((start != nullptr) && (step != nullptr));
+ ScevAddRec* addRec = new (m_comp, CMK_LoopIVOpts) ScevAddRec(start->Type, start, step DEBUGARG(m_loop));
+ return addRec;
+}
+
+//------------------------------------------------------------------------
+// CreateSimpleInvariantScev: Create a "simple invariant" SCEV node for a tree:
+// either an invariant local use or a constant.
+//
+// Parameters:
+// tree - The tree
+//
+// Returns:
+// SCEV node or nullptr if the tree is not a simple invariant.
+//
+Scev* ScalarEvolutionContext::CreateSimpleInvariantScev(GenTree* tree)
+{
+ if (tree->OperIs(GT_CNS_INT, GT_CNS_LNG))
+ {
+ return CreateScevForConstant(tree->AsIntConCommon());
+ }
+
+ if (tree->OperIs(GT_LCL_VAR) && tree->AsLclVarCommon()->HasSsaName())
+ {
+ LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon());
+ LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum());
+
+ if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock()))
+ {
+ return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum());
+ }
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// CreateScevForConstant: Given an integer constant, create a SCEV node for it.
+//
+// Parameters:
+// tree - The integer constant
+//
+// Returns:
+// SCEV node or nullptr if the integer constant is not representable (e.g. a handle).
+//
+Scev* ScalarEvolutionContext::CreateScevForConstant(GenTreeIntConCommon* tree)
+{
+ if (tree->IsIconHandle() || !tree->TypeIs(TYP_INT, TYP_LONG))
+ {
+ return nullptr;
+ }
+
+ return NewConstant(tree->TypeGet(), tree->AsIntConCommon()->IntegralValue());
+}
+
+//------------------------------------------------------------------------
+// AnalyzeNew: Analyze the specified tree in the specified block, without going
+// through the cache.
+//
+// Parameters:
+// block - Block containing the tree
+// tree - Tree node
+// depth - Current analysis depth
+//
+// Returns:
+// SCEV node if the tree was analyzable; otherwise nullptr if the value is
+// cannot be described.
+//
+Scev* ScalarEvolutionContext::AnalyzeNew(BasicBlock* block, GenTree* tree, int depth)
+{
+ switch (tree->OperGet())
+ {
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ {
+ return CreateScevForConstant(tree->AsIntConCommon());
+ }
+ case GT_LCL_VAR:
+ case GT_PHI_ARG:
+ {
+ if (!tree->AsLclVarCommon()->HasSsaName())
+ {
+ return nullptr;
+ }
+
+ assert(m_comp->lvaInSsa(tree->AsLclVarCommon()->GetLclNum()));
+ LclVarDsc* dsc = m_comp->lvaGetDesc(tree->AsLclVarCommon());
+ LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(tree->AsLclVarCommon()->GetSsaNum());
+
+ if ((ssaDsc->GetBlock() == nullptr) || !m_loop->ContainsBlock(ssaDsc->GetBlock()))
+ {
+ return NewLocal(tree->AsLclVarCommon()->GetLclNum(), tree->AsLclVarCommon()->GetSsaNum());
+ }
+
+ if (ssaDsc->GetDefNode() == nullptr)
+ {
+ // GT_CALL retbuf def?
+ return nullptr;
+ }
+
+ if (ssaDsc->GetDefNode()->GetLclNum() != tree->AsLclVarCommon()->GetLclNum())
+ {
+ // Should be a def of the parent
+ assert(dsc->lvIsStructField && (ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl));
+ return nullptr;
+ }
+
+ return Analyze(ssaDsc->GetBlock(), ssaDsc->GetDefNode(), depth + 1);
+ }
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreeLclVarCommon* store = tree->AsLclVarCommon();
+ GenTree* data = store->Data();
+ if (!data->OperIs(GT_PHI))
+ {
+ return Analyze(block, data, depth + 1);
+ }
+
+ if (block != m_loop->GetHeader())
+ {
+ return nullptr;
+ }
+
+ // We have a phi def for the current loop. Look for a primary
+ // induction variable.
+ GenTreePhi* phi = data->AsPhi();
+ GenTreePhiArg* enterSsa = nullptr;
+ GenTreePhiArg* backedgeSsa = nullptr;
+
+ for (GenTreePhi::Use& use : phi->Uses())
+ {
+ GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg();
+ GenTreePhiArg*& ssaArg = m_loop->ContainsBlock(phiArg->gtPredBB) ? backedgeSsa : enterSsa;
+ if ((ssaArg == nullptr) || (ssaArg->GetSsaNum() == phiArg->GetSsaNum()))
+ {
+ ssaArg = phiArg;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+
+ if ((enterSsa == nullptr) || (backedgeSsa == nullptr))
+ {
+ return nullptr;
+ }
+
+ ScevLocal* enterScev = NewLocal(enterSsa->GetLclNum(), enterSsa->GetSsaNum());
+
+ LclVarDsc* dsc = m_comp->lvaGetDesc(store);
+ LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(backedgeSsa->GetSsaNum());
+
+ if (ssaDsc->GetDefNode() == nullptr)
+ {
+ // GT_CALL retbuf def
+ return nullptr;
+ }
+
+ if (ssaDsc->GetDefNode()->GetLclNum() != store->GetLclNum())
+ {
+ assert(dsc->lvIsStructField && ssaDsc->GetDefNode()->GetLclNum() == dsc->lvParentLcl);
+ return nullptr;
+ }
+
+ assert(ssaDsc->GetBlock() != nullptr);
+
+ // We currently do not handle complicated addrecs. We can do this
+ // by inserting a symbolic node in the cache and analyzing while it
+ // is part of the cache. It would allow us to model
+ //
+ // int i = 0;
+ // while (i < n)
+ // {
+ // int j = i + 1;
+ // ...
+ // i = j;
+ // }
+ // =>
+ //
+ // and chains of recurrences, such as
+ //
+ // int i = 0;
+ // int j = 0;
+ // while (i < n)
+ // {
+ // j++;
+ // i += j;
+ // }
+ // => >
+ //
+ // The main issue is that it requires cache invalidation afterwards
+ // and turning the recursive result into an addrec.
+ //
+ return CreateSimpleAddRec(store, enterScev, ssaDsc->GetBlock(), ssaDsc->GetDefNode()->Data());
+ }
+ case GT_CAST:
+ {
+ GenTreeCast* cast = tree->AsCast();
+ if (cast->gtCastType != TYP_LONG)
+ {
+ return nullptr;
+ }
+
+ Scev* op = Analyze(block, cast->CastOp(), depth + 1);
+ if (op == nullptr)
+ {
+ return nullptr;
+ }
+
+ return NewExtension(cast->IsUnsigned() ? ScevOper::ZeroExtend : ScevOper::SignExtend, TYP_LONG, op);
+ }
+ case GT_ADD:
+ case GT_MUL:
+ case GT_LSH:
+ {
+ Scev* op1 = Analyze(block, tree->gtGetOp1(), depth + 1);
+ if (op1 == nullptr)
+ return nullptr;
+
+ Scev* op2 = Analyze(block, tree->gtGetOp2(), depth + 1);
+ if (op2 == nullptr)
+ return nullptr;
+
+ ScevOper oper;
+ switch (tree->OperGet())
+ {
+ case GT_ADD:
+ oper = ScevOper::Add;
+ break;
+ case GT_MUL:
+ oper = ScevOper::Mul;
+ break;
+ case GT_LSH:
+ oper = ScevOper::Lsh;
+ break;
+ default:
+ unreached();
+ }
+
+ return NewBinop(oper, op1, op2);
+ }
+ case GT_COMMA:
+ {
+ return Analyze(block, tree->gtGetOp2(), depth + 1);
+ }
+ case GT_ARR_ADDR:
+ {
+ return Analyze(block, tree->AsArrAddr()->Addr(), depth + 1);
+ }
+ default:
+ return nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// CreateSimpleAddRec: Create a "simple" add-recurrence. This handles the most
+// common patterns for primary induction variables where we see a store like
+// "i = i + 1".
+//
+// Parameters:
+// headerStore - Phi definition of the candidate primary induction variable
+// enterScev - SCEV describing start value of the primary induction variable
+// stepDefBlock - Block containing the def of the step value
+// stepDefData - Value of the def of the step value
+//
+// Returns:
+// SCEV node if this is a simple addrec shape. Otherwise nullptr.
+//
+Scev* ScalarEvolutionContext::CreateSimpleAddRec(GenTreeLclVarCommon* headerStore,
+ ScevLocal* enterScev,
+ BasicBlock* stepDefBlock,
+ GenTree* stepDefData)
+{
+ if (!stepDefData->OperIs(GT_ADD))
+ {
+ return nullptr;
+ }
+
+ GenTree* stepTree;
+ GenTree* op1 = stepDefData->gtGetOp1();
+ GenTree* op2 = stepDefData->gtGetOp2();
+ if (op1->OperIs(GT_LCL_VAR) && (op1->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) &&
+ (op1->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum()))
+ {
+ stepTree = op2;
+ }
+ else if (op2->OperIs(GT_LCL_VAR) && (op2->AsLclVar()->GetLclNum() == headerStore->GetLclNum()) &&
+ (op2->AsLclVar()->GetSsaNum() == headerStore->GetSsaNum()))
+ {
+ stepTree = op1;
+ }
+ else
+ {
+ // Not a simple IV shape (i.e. more complex than "i = i + k")
+ return nullptr;
+ }
+
+ Scev* stepScev = CreateSimpleInvariantScev(stepTree);
+ if (stepScev == nullptr)
+ {
+ return nullptr;
+ }
+
+ return NewAddRec(enterScev, stepScev);
+}
+
+//------------------------------------------------------------------------
+// Analyze: Analyze the specified tree in the specified block.
+//
+// Parameters:
+// block - Block containing the tree
+// tree - Tree node
+//
+// Returns:
+// SCEV node if the tree was analyzable; otherwise nullptr if the value is
+// cannot be described.
+//
+Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree)
+{
+ return Analyze(block, tree, 0);
+}
+
+// Since the analysis follows SSA defs we have no upper bound on the potential
+// depth of the analysis performed. We put an artificial limit on this for two
+// reasons:
+// 1. The analysis is recursive, and we should not stack overflow regardless of
+// the input program.
+// 2. If we produced arbitrarily deep SCEV trees then all algorithms over their
+// structure would similarly be at risk of stack overflows if they were
+// recursive. However, these algorithms are generally much more elegant when
+// they make use of recursion.
+const int SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH = 64;
+
+//------------------------------------------------------------------------
+// Analyze: Analyze the specified tree in the specified block.
+//
+// Parameters:
+// block - Block containing the tree
+// tree - Tree node
+// depth - Current analysis depth
+//
+// Returns:
+// SCEV node if the tree was analyzable; otherwise nullptr if the value is
+// cannot be described.
+//
+Scev* ScalarEvolutionContext::Analyze(BasicBlock* block, GenTree* tree, int depth)
+{
+ Scev* result;
+ if (!m_cache.Lookup(tree, &result))
+ {
+ if (depth >= SCALAR_EVOLUTION_ANALYSIS_MAX_DEPTH)
+ {
+ return nullptr;
+ }
+
+ result = AnalyzeNew(block, tree, depth);
+ m_cache.Set(tree, result);
+ }
+
+ return result;
+}
+
+//------------------------------------------------------------------------
+// FoldBinop: Fold simple binops.
+//
+// Type parameters:
+// T - Type that the binop is being evaluated in
+//
+// Parameters:
+// oper - Binary operation
+// op1 - First operand
+// op2 - Second operand
+//
+// Returns:
+// Folded value.
+//
+template
+static T FoldBinop(ScevOper oper, T op1, T op2)
+{
+ switch (oper)
+ {
+ case ScevOper::Add:
+ return op1 + op2;
+ case ScevOper::Mul:
+ return op1 * op2;
+ case ScevOper::Lsh:
+ return op1 << op2;
+ default:
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// Simplify: Try to simplify a SCEV node by folding and canonicalization.
+//
+// Parameters:
+// scev - The node
+//
+// Returns:
+// Simplified node.
+//
+// Remarks:
+// Canonicalization is done for binops; constants are moved to the right and
+// addrecs are moved to the left.
+//
+// Simple unops/binops on constants are folded. Operands are distributed into
+// add recs whenever possible.
+//
+Scev* ScalarEvolutionContext::Simplify(Scev* scev)
+{
+ switch (scev->Oper)
+ {
+ case ScevOper::Constant:
+ case ScevOper::Local:
+ {
+ return scev;
+ }
+ case ScevOper::ZeroExtend:
+ case ScevOper::SignExtend:
+ {
+ ScevUnop* unop = (ScevUnop*)scev;
+ assert(genTypeSize(unop->Type) >= genTypeSize(unop->Op1->Type));
+
+ Scev* op1 = Simplify(unop->Op1);
+
+ if (unop->Type == op1->Type)
+ {
+ return op1;
+ }
+
+ assert((unop->Type == TYP_LONG) && (op1->Type == TYP_INT));
+
+ if (op1->OperIs(ScevOper::Constant))
+ {
+ ScevConstant* cns = (ScevConstant*)op1;
+ return NewConstant(unop->Type, unop->OperIs(ScevOper::ZeroExtend) ? (uint64_t)(int32_t)cns->Value
+ : (int64_t)(int32_t)cns->Value);
+ }
+
+ if (op1->OperIs(ScevOper::AddRec))
+ {
+ // TODO-Cleanup: This requires some proof that it is ok, but
+ // currently we do not rely on this.
+ return op1;
+ }
+
+ return (op1 == unop->Op1) ? unop : NewExtension(unop->Oper, unop->Type, op1);
+ }
+ case ScevOper::Add:
+ case ScevOper::Mul:
+ case ScevOper::Lsh:
+ {
+ ScevBinop* binop = (ScevBinop*)scev;
+ Scev* op1 = Simplify(binop->Op1);
+ Scev* op2 = Simplify(binop->Op2);
+
+ if (binop->OperIs(ScevOper::Add, ScevOper::Mul))
+ {
+ // Normalize addrecs to the left
+ if (op2->OperIs(ScevOper::AddRec) && !op1->OperIs(ScevOper::AddRec))
+ {
+ std::swap(op1, op2);
+ }
+ // Normalize constants to the right
+ if (op1->OperIs(ScevOper::Constant) && !op2->OperIs(ScevOper::Constant))
+ {
+ std::swap(op1, op2);
+ }
+ }
+
+ if (op1->OperIs(ScevOper::AddRec))
+ {
+ // + x =>
+ // * x =>
+ ScevAddRec* addRec = (ScevAddRec*)op1;
+ Scev* newStart = Simplify(NewBinop(binop->Oper, addRec->Start, op2));
+ Scev* newStep = scev->OperIs(ScevOper::Mul, ScevOper::Lsh)
+ ? Simplify(NewBinop(binop->Oper, addRec->Step, op2))
+ : addRec->Step;
+ return NewAddRec(newStart, newStep);
+ }
+
+ if (op1->OperIs(ScevOper::Constant) && op2->OperIs(ScevOper::Constant))
+ {
+ ScevConstant* cns1 = (ScevConstant*)op1;
+ ScevConstant* cns2 = (ScevConstant*)op2;
+ int64_t newValue;
+ if (binop->TypeIs(TYP_INT))
+ {
+ newValue = FoldBinop(binop->Oper, static_cast(cns1->Value),
+ static_cast(cns2->Value));
+ }
+ else
+ {
+ assert(binop->TypeIs(TYP_LONG));
+ newValue = FoldBinop(binop->Oper, cns1->Value, cns2->Value);
+ }
+
+ return NewConstant(binop->Type, newValue);
+ }
+
+ return (op1 == binop->Op1) && (op2 == binop->Op2) ? binop : NewBinop(binop->Oper, op1, op2);
+ }
+ case ScevOper::AddRec:
+ {
+ ScevAddRec* addRec = (ScevAddRec*)scev;
+ Scev* start = Simplify(addRec->Start);
+ Scev* step = Simplify(addRec->Step);
+ return (start == addRec->Start) && (step == addRec->Step) ? addRec : NewAddRec(start, step);
+ }
+ default:
+ unreached();
+ }
+}
diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h
new file mode 100644
index 00000000000000..603088d9623661
--- /dev/null
+++ b/src/coreclr/jit/scev.h
@@ -0,0 +1,155 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma once
+
+// This file contains the definition of the scalar evolution IR. This IR allows
+// representing the values of IR nodes inside loops in a closed form, taking
+// into account that they are changing on each loop iteration. The IR is based
+// around the following possible operations. At the core is ScevOper::AddRec,
+// which represents a value that evolves by an add recurrence. In dumps it is
+// described by where "loop" is the loop the value is
+// evolving in, "start" is the initial value and "step" is the step by which
+// the value evolves in every iteration.
+//
+// See scev.cpp for further documentation.
+//
+enum class ScevOper
+{
+ Constant,
+ Local,
+ ZeroExtend,
+ SignExtend,
+ Add,
+ Mul,
+ Lsh,
+ AddRec,
+};
+
+static bool ScevOperIs(ScevOper oper, ScevOper otherOper)
+{
+ return oper == otherOper;
+}
+
+template
+static bool ScevOperIs(ScevOper oper, ScevOper operFirst, Args... operTail)
+{
+ return oper == operFirst || ScevOperIs(oper, operTail...);
+}
+
+struct Scev
+{
+ const ScevOper Oper;
+ const var_types Type;
+
+ Scev(ScevOper oper, var_types type) : Oper(oper), Type(type)
+ {
+ }
+
+ template
+ bool OperIs(Args... opers)
+ {
+ return ScevOperIs(Oper, opers...);
+ }
+
+ bool TypeIs(var_types type)
+ {
+ return Type == type;
+ }
+
+ bool GetConstantValue(Compiler* comp, int64_t* cns);
+
+#ifdef DEBUG
+ void Dump(Compiler* comp);
+#endif
+};
+
+struct ScevConstant : Scev
+{
+ ScevConstant(var_types type, int64_t value) : Scev(ScevOper::Constant, type), Value(value)
+ {
+ }
+
+ int64_t Value;
+};
+
+struct ScevLocal : Scev
+{
+ ScevLocal(var_types type, unsigned lclNum, unsigned ssaNum)
+ : Scev(ScevOper::Local, type), LclNum(lclNum), SsaNum(ssaNum)
+ {
+ }
+
+ const unsigned LclNum;
+ const unsigned SsaNum;
+
+ bool GetConstantValue(Compiler* comp, int64_t* cns);
+};
+
+struct ScevUnop : Scev
+{
+ ScevUnop(ScevOper oper, var_types type, Scev* op1) : Scev(oper, type), Op1(op1)
+ {
+ }
+
+ Scev* const Op1;
+};
+
+struct ScevBinop : ScevUnop
+{
+ ScevBinop(ScevOper oper, var_types type, Scev* op1, Scev* op2) : ScevUnop(oper, type, op1), Op2(op2)
+ {
+ }
+
+ Scev* const Op2;
+};
+
+// Represents a value that evolves by an add recurrence.
+// The value at iteration N is Start + N * Step.
+// "Start" and "Step" are guaranteed to be invariant in "Loop".
+struct ScevAddRec : Scev
+{
+ ScevAddRec(var_types type, Scev* start, Scev* step DEBUGARG(FlowGraphNaturalLoop* loop))
+ : Scev(ScevOper::AddRec, type), Start(start), Step(step) DEBUGARG(Loop(loop))
+ {
+ }
+
+ Scev* const Start;
+ Scev* const Step;
+ INDEBUG(FlowGraphNaturalLoop* const Loop);
+};
+
+typedef JitHashTable, Scev*> ScalarEvolutionMap;
+
+// Scalar evolution is analyzed in the context of a single loop, and are
+// computed on-demand by the use of the "Analyze" method on this class, which
+// also maintains a cache.
+class ScalarEvolutionContext
+{
+ Compiler* m_comp;
+ FlowGraphNaturalLoop* m_loop = nullptr;
+ ScalarEvolutionMap m_cache;
+
+ Scev* Analyze(BasicBlock* block, GenTree* tree, int depth);
+ Scev* AnalyzeNew(BasicBlock* block, GenTree* tree, int depth);
+ Scev* CreateSimpleAddRec(GenTreeLclVarCommon* headerStore,
+ ScevLocal* start,
+ BasicBlock* stepDefBlock,
+ GenTree* stepDefData);
+ Scev* CreateSimpleInvariantScev(GenTree* tree);
+ Scev* CreateScevForConstant(GenTreeIntConCommon* tree);
+
+public:
+ ScalarEvolutionContext(Compiler* comp);
+
+ void ResetForLoop(FlowGraphNaturalLoop* loop);
+
+ ScevConstant* NewConstant(var_types type, int64_t value);
+ ScevLocal* NewLocal(unsigned lclNum, unsigned ssaNum);
+ ScevUnop* NewExtension(ScevOper oper, var_types targetType, Scev* op);
+ ScevBinop* NewBinop(ScevOper oper, Scev* op1, Scev* op2);
+ ScevAddRec* NewAddRec(Scev* start, Scev* step);
+
+ Scev* Analyze(BasicBlock* block, GenTree* tree);
+ Scev* Simplify(Scev* scev);
+};