@@ -175,31 +175,15 @@ define double @reduction_v2f64(ptr %p) {
175175define float @reduction_v4f32 (ptr %p ) {
176176; SSE-LABEL: define float @reduction_v4f32
177177; SSE-SAME: (ptr [[P:%.*]]) {
178- ; SSE-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
179- ; SSE-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
180- ; SSE-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
181- ; SSE-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
182- ; SSE-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
183- ; SSE-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
184- ; SSE-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
185- ; SSE-NEXT: [[M1:%.*]] = tail call float @llvm.maximum.f32(float [[T1]], float [[T0]])
186- ; SSE-NEXT: [[M2:%.*]] = tail call float @llvm.maximum.f32(float [[T2]], float [[M1]])
187- ; SSE-NEXT: [[M3:%.*]] = tail call float @llvm.maximum.f32(float [[T3]], float [[M2]])
188- ; SSE-NEXT: ret float [[M3]]
178+ ; SSE-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4
179+ ; SSE-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]])
180+ ; SSE-NEXT: ret float [[TMP2]]
189181;
190182; AVX-LABEL: define float @reduction_v4f32
191183; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
192- ; AVX-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
193- ; AVX-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
194- ; AVX-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
195- ; AVX-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
196- ; AVX-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
197- ; AVX-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
198- ; AVX-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
199- ; AVX-NEXT: [[M1:%.*]] = tail call float @llvm.maximum.f32(float [[T1]], float [[T0]])
200- ; AVX-NEXT: [[M2:%.*]] = tail call float @llvm.maximum.f32(float [[T2]], float [[M1]])
201- ; AVX-NEXT: [[M3:%.*]] = tail call float @llvm.maximum.f32(float [[T3]], float [[M2]])
202- ; AVX-NEXT: ret float [[M3]]
184+ ; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P]], align 4
185+ ; AVX-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[TMP1]])
186+ ; AVX-NEXT: ret float [[TMP2]]
203187;
204188 %g1 = getelementptr inbounds float , ptr %p , i64 1
205189 %g2 = getelementptr inbounds float , ptr %p , i64 2
@@ -217,31 +201,15 @@ define float @reduction_v4f32(ptr %p) {
217201define double @reduction_v4f64_fminimum (ptr %p ) {
218202; SSE-LABEL: define double @reduction_v4f64_fminimum
219203; SSE-SAME: (ptr [[P:%.*]]) {
220- ; SSE-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1
221- ; SSE-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[P]], i64 2
222- ; SSE-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[P]], i64 3
223- ; SSE-NEXT: [[T0:%.*]] = load double, ptr [[P]], align 4
224- ; SSE-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4
225- ; SSE-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4
226- ; SSE-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4
227- ; SSE-NEXT: [[M1:%.*]] = tail call double @llvm.minimum.f64(double [[T1]], double [[T0]])
228- ; SSE-NEXT: [[M2:%.*]] = tail call double @llvm.minimum.f64(double [[T2]], double [[M1]])
229- ; SSE-NEXT: [[M3:%.*]] = tail call double @llvm.minimum.f64(double [[T3]], double [[M2]])
230- ; SSE-NEXT: ret double [[M3]]
204+ ; SSE-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4
205+ ; SSE-NEXT: [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]])
206+ ; SSE-NEXT: ret double [[TMP2]]
231207;
232208; AVX-LABEL: define double @reduction_v4f64_fminimum
233209; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
234- ; AVX-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 1
235- ; AVX-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[P]], i64 2
236- ; AVX-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[P]], i64 3
237- ; AVX-NEXT: [[T0:%.*]] = load double, ptr [[P]], align 4
238- ; AVX-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4
239- ; AVX-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4
240- ; AVX-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4
241- ; AVX-NEXT: [[M1:%.*]] = tail call double @llvm.minimum.f64(double [[T1]], double [[T0]])
242- ; AVX-NEXT: [[M2:%.*]] = tail call double @llvm.minimum.f64(double [[T2]], double [[M1]])
243- ; AVX-NEXT: [[M3:%.*]] = tail call double @llvm.minimum.f64(double [[T3]], double [[M2]])
244- ; AVX-NEXT: ret double [[M3]]
210+ ; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[P]], align 4
211+ ; AVX-NEXT: [[TMP2:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[TMP1]])
212+ ; AVX-NEXT: ret double [[TMP2]]
245213;
246214 %g1 = getelementptr inbounds double , ptr %p , i64 1
247215 %g2 = getelementptr inbounds double , ptr %p , i64 2
@@ -259,55 +227,15 @@ define double @reduction_v4f64_fminimum(ptr %p) {
259227define float @reduction_v8f32_fminimum (ptr %p ) {
260228; SSE-LABEL: define float @reduction_v8f32_fminimum
261229; SSE-SAME: (ptr [[P:%.*]]) {
262- ; SSE-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
263- ; SSE-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
264- ; SSE-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
265- ; SSE-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[P]], i64 4
266- ; SSE-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[P]], i64 5
267- ; SSE-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[P]], i64 6
268- ; SSE-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[P]], i64 7
269- ; SSE-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
270- ; SSE-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
271- ; SSE-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
272- ; SSE-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
273- ; SSE-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4
274- ; SSE-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4
275- ; SSE-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4
276- ; SSE-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4
277- ; SSE-NEXT: [[M1:%.*]] = tail call float @llvm.minimum.f32(float [[T1]], float [[T0]])
278- ; SSE-NEXT: [[M2:%.*]] = tail call float @llvm.minimum.f32(float [[T2]], float [[M1]])
279- ; SSE-NEXT: [[M3:%.*]] = tail call float @llvm.minimum.f32(float [[T3]], float [[M2]])
280- ; SSE-NEXT: [[M4:%.*]] = tail call float @llvm.minimum.f32(float [[T4]], float [[M3]])
281- ; SSE-NEXT: [[M5:%.*]] = tail call float @llvm.minimum.f32(float [[M4]], float [[T6]])
282- ; SSE-NEXT: [[M6:%.*]] = tail call float @llvm.minimum.f32(float [[M5]], float [[T5]])
283- ; SSE-NEXT: [[M7:%.*]] = tail call float @llvm.minimum.f32(float [[M6]], float [[T7]])
284- ; SSE-NEXT: ret float [[M7]]
230+ ; SSE-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4
231+ ; SSE-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]])
232+ ; SSE-NEXT: ret float [[TMP2]]
285233;
286234; AVX-LABEL: define float @reduction_v8f32_fminimum
287235; AVX-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
288- ; AVX-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1
289- ; AVX-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
290- ; AVX-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
291- ; AVX-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[P]], i64 4
292- ; AVX-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[P]], i64 5
293- ; AVX-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[P]], i64 6
294- ; AVX-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[P]], i64 7
295- ; AVX-NEXT: [[T0:%.*]] = load float, ptr [[P]], align 4
296- ; AVX-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
297- ; AVX-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
298- ; AVX-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
299- ; AVX-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4
300- ; AVX-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4
301- ; AVX-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4
302- ; AVX-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4
303- ; AVX-NEXT: [[M1:%.*]] = tail call float @llvm.minimum.f32(float [[T1]], float [[T0]])
304- ; AVX-NEXT: [[M2:%.*]] = tail call float @llvm.minimum.f32(float [[T2]], float [[M1]])
305- ; AVX-NEXT: [[M3:%.*]] = tail call float @llvm.minimum.f32(float [[T3]], float [[M2]])
306- ; AVX-NEXT: [[M4:%.*]] = tail call float @llvm.minimum.f32(float [[T4]], float [[M3]])
307- ; AVX-NEXT: [[M5:%.*]] = tail call float @llvm.minimum.f32(float [[M4]], float [[T6]])
308- ; AVX-NEXT: [[M6:%.*]] = tail call float @llvm.minimum.f32(float [[M5]], float [[T5]])
309- ; AVX-NEXT: [[M7:%.*]] = tail call float @llvm.minimum.f32(float [[M6]], float [[T7]])
310- ; AVX-NEXT: ret float [[M7]]
236+ ; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[P]], align 4
237+ ; AVX-NEXT: [[TMP2:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]])
238+ ; AVX-NEXT: ret float [[TMP2]]
311239;
312240 %g1 = getelementptr inbounds float , ptr %p , i64 1
313241 %g2 = getelementptr inbounds float , ptr %p , i64 2
0 commit comments