Skip to content

Commit f003ba4

Browse files
AndyAyersMSCopilot
andauthored
JIT: clone array loops with stride > 57 (#129349)
Drop the blanket stride cap (`stride < 58`) in optDeriveLoopCloningConditions that previously rejected array loops whose post-step IV could exceed INT_MAX given Array.MaxLength. For larger strides emit a runtime `arr.Length <= INT_MAX - s + 1` cloning condition; for small strides the implicit Array.MaxLength bound still suffices and no extra check is added. Builds on similar work we added for spans in #129309. --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 07b0630 commit f003ba4

3 files changed

Lines changed: 197 additions & 40 deletions

File tree

src/coreclr/jit/loopcloning.cpp

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,26 +1226,42 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
12261226
// is beyond the limit.
12271227
int stride = abs(iterInfo->IterConst());
12281228

1229+
// For arrays the per-access cloning condition only bounds `limit` by
1230+
// Array.MaxLength (0x7FFFFFC7), which leaves room for the post-step IV
1231+
// up to `limit + s - 1` to fit in INT_MAX as long as `s <= 57`. Larger
1232+
// strides need an explicit overflow guard, same shape as the one used
1233+
// for spans (where Span<>.Length can reach INT_MAX even at small s).
12291234
static_assert(INT32_MAX >= CORINFO_Array_MaxLength);
1230-
if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1))
1235+
const bool largeStride = (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1));
1236+
const bool needsOverflowGuard = hasSpans || largeStride;
1237+
1238+
// If the loop limit is an array length, compute the underlying ArrIndex
1239+
// and queue the deref check once up front. The optional zero-trip guard,
1240+
// the optional overflow guard, and the regular limit conditions all
1241+
// reuse this single ArrIndex.
1242+
//
1243+
ArrIndex* limitArrIndex = nullptr;
1244+
if (iterInfo->HasArrayLengthLimit)
12311245
{
1232-
// Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure
1233-
// the stride increment doesn't overflow or underflow the index. Hence,
1234-
// the maximum stride limit is set to
1235-
// (int.MaxValue - (Array.MaxLength - 1) + 1), which is
1236-
// (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58.
1237-
return false;
1246+
limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone));
1247+
if (!iterInfo->ArrLenLimit(this, limitArrIndex))
1248+
{
1249+
JITDUMP("> ArrLen not matching\n");
1250+
return false;
1251+
}
1252+
1253+
LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None);
1254+
context->EnsureArrayDerefs(loop->GetIndex())->Push(array);
12381255
}
12391256

1240-
// Span<>.Length can be INT32_MAX, unlike Array.MaxLength. For an
1241-
// increasing loop with stride > 1, the IV after the final in-loop
1242-
// increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so
1243-
// a limit near INT32_MAX would wrap the IV and let the bounds-check-
1244-
// stripped fast clone access memory past the span. Bound the limit
1245-
// base accordingly. Decreasing loops are safe via the existing
1246-
// `limit >= 0` condition plus the stride cap above. HasArrayLengthLimit
1247-
// is bounded implicitly by Array.MaxLength.
1248-
if (hasSpans && (stride > 1) && isIncreasingLoop)
1257+
// For an increasing loop with stride > 1, the IV after the final in-loop
1258+
// increment is at most `limit + s` (LE) or `limit + s - 1` (LT), so a
1259+
// limit near INT32_MAX would wrap the IV and let the bounds-check-
1260+
// stripped fast clone access memory past the array/span. Bound the limit
1261+
// base accordingly. Decreasing loops are safe via the existing `limit
1262+
// >= 0` condition (post-step IV >= -stride > INT_MIN for any non-absurd
1263+
// stride).
1264+
if ((stride > 1) && isIncreasingLoop && needsOverflowGuard)
12491265
{
12501266
const int adjustForLE = (iterInfo->TestOper() == GT_LE) ? 1 : 0;
12511267
const int offset = iterInfo->LimitOffset;
@@ -1257,7 +1273,7 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
12571273
const int limitVal = iterInfo->ConstLimit();
12581274
if ((int64_t)limitVal > maxLimitBase64)
12591275
{
1260-
JITDUMP("> Span stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal,
1276+
JITDUMP("> Stride %d: const limit %d exceeds overflow bound %lld\n", stride, limitVal,
12611277
(long long)maxLimitBase64);
12621278
return false;
12631279
}
@@ -1266,20 +1282,19 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
12661282
{
12671283
if (maxLimitBase64 >= INT32_MAX)
12681284
{
1269-
// Offset already absorbs the stride; guard is vacuous.
1270-
JITDUMP("Span stride>1 overflow guard trivially holds (offset %d)\n", offset);
1285+
JITDUMP("Stride>1 overflow guard trivially holds (offset %d)\n", offset);
12711286
}
12721287
else if (maxLimitBase64 < 0)
12731288
{
1274-
JITDUMP("> Span stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset);
1289+
JITDUMP("> Stride %d, offset %d: overflow guard unsatisfiable\n", stride, offset);
12751290
return false;
12761291
}
12771292
else
12781293
{
12791294
const unsigned limitLcl = iterInfo->VarLimit();
12801295
if (!genActualTypeIsInt(lvaGetDesc(limitLcl)))
12811296
{
1282-
JITDUMP("> Span stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl);
1297+
JITDUMP("> Stride %d: limit var V%02u not TYP_INT-compatible\n", stride, limitLcl);
12831298
return false;
12841299
}
12851300

@@ -1288,29 +1303,36 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl
12881303
LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast<unsigned>(maxLimit));
12891304
LC_Condition overflowGuard(GT_LE, LC_Expr(limitVarIdent), LC_Expr(maxConstIdent));
12901305
context->EnsureConditions(loop->GetIndex())->Push(overflowGuard);
1291-
JITDUMP("Added Span stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit);
1306+
JITDUMP("Added stride>1 overflow guard: V%02u <= %d\n", limitLcl, maxLimit);
12921307
}
12931308
}
1294-
// HasArrayLengthLimit: bounded by Array.MaxLength, no extra guard.
1295-
}
1296-
1297-
// If the loop limit is an array length, compute the underlying ArrIndex
1298-
// and queue the deref check once up front. Both the optional zero-trip
1299-
// guard below and the regular limit conditions further down reuse this
1300-
// single ArrIndex to avoid duplicating the deref entry and allocation.
1301-
//
1302-
ArrIndex* limitArrIndex = nullptr;
1303-
if (iterInfo->HasArrayLengthLimit)
1304-
{
1305-
limitArrIndex = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone));
1306-
if (!iterInfo->ArrLenLimit(this, limitArrIndex))
1309+
else if (iterInfo->HasArrayLengthLimit && largeStride)
13071310
{
1308-
JITDUMP("> ArrLen not matching\n");
1309-
return false;
1311+
// For stride <= 57 the implicit Array.MaxLength bound suffices;
1312+
// we fall through with no extra check. For wider strides emit a
1313+
// runtime guard on arr.Length so the fast clone only runs when
1314+
// the array is short enough that the post-step IV stays in int.
1315+
assert(limitArrIndex != nullptr);
1316+
if (maxLimitBase64 >= CORINFO_Array_MaxLength)
1317+
{
1318+
JITDUMP("Stride>1 overflow guard trivially holds for arr.Length (offset %d)\n", offset);
1319+
}
1320+
else if (maxLimitBase64 < 0)
1321+
{
1322+
JITDUMP("> Stride %d, offset %d: arr.Length overflow guard unsatisfiable\n", stride, offset);
1323+
return false;
1324+
}
1325+
else
1326+
{
1327+
const int maxLimit = (int)maxLimitBase64;
1328+
LC_Ident arrLenIdent =
1329+
LC_Ident::CreateArrAccess(LC_Array(LC_Array::Jagged, limitArrIndex, LC_Array::ArrLen));
1330+
LC_Ident maxConstIdent = LC_Ident::CreateConst(static_cast<unsigned>(maxLimit));
1331+
LC_Condition overflowGuard(GT_LE, LC_Expr(arrLenIdent), LC_Expr(maxConstIdent));
1332+
context->EnsureConditions(loop->GetIndex())->Push(overflowGuard);
1333+
JITDUMP("Added stride>1 arr.Length overflow guard: <= %d\n", maxLimit);
1334+
}
13101335
}
1311-
1312-
LC_Array array(LC_Array::Jagged, limitArrIndex, LC_Array::None);
1313-
context->EnsureArrayDerefs(loop->GetIndex())->Push(array);
13141336
}
13151337

13161338
// If AnalyzeIteration could not prove the loop condition holds on entry,
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
using Xunit;
7+
8+
public class LargeStride
9+
{
10+
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
11+
static int ArrayLengthStride60(int[] a)
12+
{
13+
int sum = 0;
14+
for (int i = 0; i < a.Length; i += 60)
15+
sum += a[i];
16+
return sum;
17+
}
18+
19+
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
20+
static int ArrayLengthStride256(int[] a)
21+
{
22+
int sum = 0;
23+
for (int i = 0; i < a.Length; i += 256)
24+
sum += a[i];
25+
return sum;
26+
}
27+
28+
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
29+
static int VarLimitStride100(int[] a, int n)
30+
{
31+
int sum = 0;
32+
for (int i = 0; i < n; i += 100)
33+
sum += a[i];
34+
return sum;
35+
}
36+
37+
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
38+
static int ConstLimitStride200(int[] a)
39+
{
40+
int sum = 0;
41+
for (int i = 0; i < 1000; i += 200)
42+
sum += a[i];
43+
return sum;
44+
}
45+
46+
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
47+
static int DecGtStride100(int[] a, int n)
48+
{
49+
int sum = 0;
50+
for (int i = n; i > 0; i -= 100)
51+
sum += a[i];
52+
return sum;
53+
}
54+
55+
static int[] Make(int n)
56+
{
57+
int[] a = new int[n];
58+
for (int i = 0; i < n; i++) a[i] = i + 1;
59+
return a;
60+
}
61+
62+
static int ExpectedIncLt(int n, int stride)
63+
{
64+
int sum = 0;
65+
for (int i = 0; i < n; i += stride) sum += i + 1;
66+
return sum;
67+
}
68+
69+
static int ExpectedDecGt(int n, int stride)
70+
{
71+
int sum = 0;
72+
for (int i = n; i > 0; i -= stride) sum += i + 1;
73+
return sum;
74+
}
75+
76+
[Theory]
77+
[InlineData(0)]
78+
[InlineData(59)]
79+
[InlineData(60)]
80+
[InlineData(120)]
81+
[InlineData(300)]
82+
public static void ArrayLengthStride60Test(int n)
83+
{
84+
int[] a = Make(n);
85+
Assert.Equal(ExpectedIncLt(n, 60), ArrayLengthStride60(a));
86+
}
87+
88+
[Theory]
89+
[InlineData(0)]
90+
[InlineData(255)]
91+
[InlineData(256)]
92+
[InlineData(1000)]
93+
public static void ArrayLengthStride256Test(int n)
94+
{
95+
int[] a = Make(n);
96+
Assert.Equal(ExpectedIncLt(n, 256), ArrayLengthStride256(a));
97+
}
98+
99+
[Theory]
100+
[InlineData(0, 1000)]
101+
[InlineData(99, 1000)]
102+
[InlineData(100, 1000)]
103+
[InlineData(500, 1000)]
104+
public static void VarLimitStride100Test(int n, int len)
105+
{
106+
int[] a = Make(len);
107+
Assert.Equal(ExpectedIncLt(n, 100), VarLimitStride100(a, n));
108+
}
109+
110+
[Fact]
111+
public static void ConstLimitStride200Test()
112+
{
113+
int[] a = Make(1000);
114+
Assert.Equal(ExpectedIncLt(1000, 200), ConstLimitStride200(a));
115+
}
116+
117+
[Theory]
118+
[InlineData(1, 1000)]
119+
[InlineData(99, 1000)]
120+
[InlineData(100, 1000)]
121+
[InlineData(500, 1000)]
122+
public static void DecGtStride100Test(int n, int len)
123+
{
124+
int[] a = Make(len);
125+
Assert.Equal(ExpectedDecGt(n, 100), DecGtStride100(a, n));
126+
}
127+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

0 commit comments

Comments
 (0)