|
6 | 6 | using System.Numerics; |
7 | 7 | using System.Runtime.CompilerServices; |
8 | 8 | using System.Runtime.InteropServices; |
| 9 | +#if SUPPORTS_RUNTIME_INTRINSICS |
| 10 | +using System.Runtime.Intrinsics; |
| 11 | +using System.Runtime.Intrinsics.X86; |
| 12 | +#endif |
9 | 13 | using System.Text; |
10 | 14 |
|
11 | 15 | // ReSharper disable InconsistentNaming |
@@ -596,5 +600,98 @@ private static void GuardBlockIndex(int idx) |
596 | 600 | DebugGuard.MustBeLessThan(idx, Size, nameof(idx)); |
597 | 601 | DebugGuard.MustBeGreaterThanOrEqualTo(idx, 0, nameof(idx)); |
598 | 602 | } |
| 603 | + |
| 604 | + /// <summary> |
| 605 | + /// Transpose the block into the destination block. |
| 606 | + /// </summary> |
| 607 | + /// <param name="d">The destination block</param> |
| 608 | + [MethodImpl(InliningOptions.ShortMethod)] |
| 609 | + public void TransposeInto(ref Block8x8F d) |
| 610 | + { |
| 611 | +#if SUPPORTS_RUNTIME_INTRINSICS |
| 612 | + if (Avx.IsSupported) |
| 613 | + { |
| 614 | + this.TransposeIntoAvx(ref d); |
| 615 | + } |
| 616 | + else |
| 617 | +#endif |
| 618 | + { |
| 619 | + this.TransposeIntoFallback(ref d); |
| 620 | + } |
| 621 | + } |
| 622 | + |
| 623 | +#if SUPPORTS_RUNTIME_INTRINSICS |
| 624 | + /// <summary> |
| 625 | + /// AVX-only variant for executing <see cref="TransposeInto(ref Block8x8F)"/>. |
| 626 | + /// <see href="https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536"/> |
| 627 | + /// </summary> |
| 628 | + [MethodImpl(InliningOptions.ShortMethod)] |
| 629 | + public void TransposeIntoAvx(ref Block8x8F d) |
| 630 | + { |
| 631 | + Vector256<float> r0 = Avx.InsertVector128( |
| 632 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V0L).ToVector256(), |
| 633 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V4L), |
| 634 | + 1); |
| 635 | + |
| 636 | + Vector256<float> r1 = Avx.InsertVector128( |
| 637 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V1L).ToVector256(), |
| 638 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V5L), |
| 639 | + 1); |
| 640 | + |
| 641 | + Vector256<float> r2 = Avx.InsertVector128( |
| 642 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V2L).ToVector256(), |
| 643 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V6L), |
| 644 | + 1); |
| 645 | + |
| 646 | + Vector256<float> r3 = Avx.InsertVector128( |
| 647 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V3L).ToVector256(), |
| 648 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V7L), |
| 649 | + 1); |
| 650 | + |
| 651 | + Vector256<float> r4 = Avx.InsertVector128( |
| 652 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(), |
| 653 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V4R), |
| 654 | + 1); |
| 655 | + |
| 656 | + Vector256<float> r5 = Avx.InsertVector128( |
| 657 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(), |
| 658 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V5R), |
| 659 | + 1); |
| 660 | + |
| 661 | + Vector256<float> r6 = Avx.InsertVector128( |
| 662 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(), |
| 663 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V6R), |
| 664 | + 1); |
| 665 | + |
| 666 | + Vector256<float> r7 = Avx.InsertVector128( |
| 667 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(), |
| 668 | + Unsafe.As<Vector4, Vector128<float>>(ref this.V7R), |
| 669 | + 1); |
| 670 | + |
| 671 | + Vector256<float> t0 = Avx.UnpackLow(r0, r1); |
| 672 | + Vector256<float> t2 = Avx.UnpackLow(r2, r3); |
| 673 | + Vector256<float> v = Avx.Shuffle(t0, t2, 0x4E); |
| 674 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V0L) = Avx.Blend(t0, v, 0xCC); |
| 675 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V1L) = Avx.Blend(t2, v, 0x33); |
| 676 | + |
| 677 | + Vector256<float> t4 = Avx.UnpackLow(r4, r5); |
| 678 | + Vector256<float> t6 = Avx.UnpackLow(r6, r7); |
| 679 | + v = Avx.Shuffle(t4, t6, 0x4E); |
| 680 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V4L) = Avx.Blend(t4, v, 0xCC); |
| 681 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V5L) = Avx.Blend(t6, v, 0x33); |
| 682 | + |
| 683 | + Vector256<float> t1 = Avx.UnpackHigh(r0, r1); |
| 684 | + Vector256<float> t3 = Avx.UnpackHigh(r2, r3); |
| 685 | + v = Avx.Shuffle(t1, t3, 0x4E); |
| 686 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V2L) = Avx.Blend(t1, v, 0xCC); |
| 687 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V3L) = Avx.Blend(t3, v, 0x33); |
| 688 | + |
| 689 | + Vector256<float> t5 = Avx.UnpackHigh(r4, r5); |
| 690 | + Vector256<float> t7 = Avx.UnpackHigh(r6, r7); |
| 691 | + v = Avx.Shuffle(t5, t7, 0x4E); |
| 692 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V6L) = Avx.Blend(t5, v, 0xCC); |
| 693 | + Unsafe.As<Vector4, Vector256<float>>(ref d.V7L) = Avx.Blend(t7, v, 0x33); |
| 694 | + } |
| 695 | +#endif |
599 | 696 | } |
600 | 697 | } |
0 commit comments