From 83ab57b7c42bbd7fb26a05ea11b0f21cee88894f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 28 Jan 2026 00:56:52 -0500 Subject: [PATCH 1/9] Merge commit 'd9aae8cc544c5f524a12b5f3b2f3c3745c34dd74' into sync-from-portable-simd-2026-01-28 --- .github/workflows/ci.yml | 23 +- Cargo.lock | 279 +++++++++++++---- beginners-guide.md | 2 +- crates/core_simd/examples/dot_product.rs | 34 +- crates/core_simd/examples/matrix_inversion.rs | 2 +- crates/core_simd/src/fmt.rs | 3 +- crates/core_simd/src/iter.rs | 10 +- crates/core_simd/src/lane_count.rs | 40 --- crates/core_simd/src/lib.rs | 9 +- crates/core_simd/src/masks.rs | 180 ++++++----- crates/core_simd/src/masks/bitmask.rs | 228 -------------- crates/core_simd/src/masks/full_masks.rs | 296 ------------------ crates/core_simd/src/mod.rs | 3 +- crates/core_simd/src/ops.rs | 5 +- crates/core_simd/src/ops/assign.rs | 1 - crates/core_simd/src/ops/deref.rs | 3 - crates/core_simd/src/ops/shift_scalar.rs | 10 +- crates/core_simd/src/ops/unary.rs | 4 +- crates/core_simd/src/select.rs | 197 +++++++++--- crates/core_simd/src/simd/cmp/eq.rs | 24 +- crates/core_simd/src/simd/cmp/ord.rs | 60 ++-- crates/core_simd/src/simd/num/float.rs | 10 +- crates/core_simd/src/simd/num/int.rs | 14 +- crates/core_simd/src/simd/num/uint.rs | 10 +- crates/core_simd/src/simd/ptr/const_ptr.rs | 9 +- crates/core_simd/src/simd/ptr/mut_ptr.rs | 9 +- crates/core_simd/src/swizzle.rs | 62 ++-- crates/core_simd/src/swizzle_dyn.rs | 21 +- crates/core_simd/src/to_bytes.rs | 4 +- crates/core_simd/src/vector.rs | 105 ++----- crates/core_simd/src/vendor/loongarch64.rs | 45 ++- crates/core_simd/src/vendor/wasm32.rs | 14 - crates/core_simd/src/vendor/x86.rs | 22 -- crates/core_simd/tests/masks.rs | 4 +- crates/std_float/src/lib.rs | 108 +++---- crates/std_float/tests/float.rs | 29 +- crates/test_helpers/Cargo.toml | 1 + crates/test_helpers/src/approxeq.rs | 110 +++++++ crates/test_helpers/src/lib.rs | 80 ++++- rust-toolchain.toml | 2 +- 40 files changed, 879 insertions(+), 1193 deletions(-) delete mode 100644 crates/core_simd/src/lane_count.rs delete mode 100644 crates/core_simd/src/masks/bitmask.rs delete mode 100644 crates/core_simd/src/masks/full_masks.rs create mode 100644 crates/test_helpers/src/approxeq.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3984d8f0d8d9..de7efa355283 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: strategy: fail-fast: false matrix: - target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu] + target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, x86_64-unknown-linux-gnu] # `default` means we use the default target config for the target, # `native` means we run with `-Ctarget-cpu=native`, and anything else is # an arg to `-Ctarget-feature` @@ -68,18 +68,12 @@ jobs: exclude: # -Ctarget-cpu=native sounds like bad-news if target != host - { target: i686-pc-windows-msvc, target_feature: native } - - { target: i586-pc-windows-msvc, target_feature: native } include: # Populate the `matrix.os` field - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest } - { target: x86_64-pc-windows-msvc, os: windows-latest } - { target: i686-pc-windows-msvc, os: windows-latest } - - { target: i586-pc-windows-msvc, os: windows-latest } - - # These are globally available on all the other targets. - - { target: i586-pc-windows-msvc, target_feature: +sse, os: windows-latest } - - { target: i586-pc-windows-msvc, target_feature: +sse2, os: windows-latest } # Annoyingly, the x86_64-unknown-linux-gnu runner *almost* always has # avx512vl, but occasionally doesn't. Maybe one day we can enable it. @@ -129,7 +123,7 @@ jobs: run: cargo doc --verbose --target=${{ matrix.target }} env: RUSTDOCFLAGS: -Dwarnings - + macos-tests: name: ${{ matrix.target }} runs-on: macos-latest @@ -246,9 +240,18 @@ jobs: miri: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3, 4] env: PROPTEST_CASES: 16 steps: - uses: actions/checkout@v4 - - name: Test (Miri) - run: cargo miri test + + - name: Install cargo-nextest + uses: taiki-e/install-action@nextest + + - name: Test (Miri) (partition ${{ matrix.shard }}/4) + run: | + cargo miri nextest run --partition count:${{ matrix.shard }}/4 diff --git a/Cargo.lock b/Cargo.lock index 1584c704fb22..5a5f0d8907ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,12 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "autocfg" -version = "1.1.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" @@ -16,31 +16,30 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +dependencies = [ + "shlex", +] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "core_simd" @@ -54,46 +53,69 @@ dependencies = [ ] [[package]] -name = "js-sys" -version = "0.3.64" +name = "float-cmp" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" dependencies = [ + "num-traits", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "log" -version = "0.4.20" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "minicov" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +dependencies = [ + "cc", + "walkdir", +] [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -114,9 +136,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -167,10 +189,25 @@ dependencies = [ ] [[package]] -name = "scoped-tls" -version = "1.0.1" +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "std_float" @@ -184,9 +221,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.29" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -197,34 +234,46 @@ dependencies = [ name = "test_helpers" version = "0.1.0" dependencies = [ + "float-cmp", "proptest", ] [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -233,21 +282,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -255,9 +305,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -268,19 +318,21 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-bindgen-test" -version = "0.3.37" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671" +checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" dependencies = [ - "console_error_panic_hook", "js-sys", - "scoped-tls", + "minicov", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", @@ -288,20 +340,123 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.37" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575" +checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" dependencies = [ "proc-macro2", "quote", + "syn", ] [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", ] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/beginners-guide.md b/beginners-guide.md index dc08d847ced5..4250a18315a6 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -25,7 +25,7 @@ SIMD has a few special vocabulary terms you should know: * **Scalar:** "Scalar" in mathematical contexts refers to values that can be represented as a single element, mostly numbers like 6, 3.14, or -2. It can also be used to describe "scalar operations" that use strictly scalar values, like addition. This term is mostly used to differentiate between vectorized operations that use SIMD instructions and scalar operations that don't. -* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops. +* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general it is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops. * **Bit Widths:** When talking about SIMD, the bit widths used are the bit size of the vectors involved, *not* the individual elements. So "128-bit SIMD" has 128-bit vectors, and that might be `f32x4`, `i32x4`, `i16x8`, or other variations. While 128-bit SIMD is the most common, there's also 64-bit, 256-bit, and even 512-bit on the newest CPUs. diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 75d152ae7f0e..4ef32bfa60b5 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -1,8 +1,6 @@ //! Code taken from the `packed_simd` crate. //! Run this code with `cargo test --example dot_product`. -#![feature(array_chunks)] -#![feature(slice_as_chunks)] // Add these imports to use the stdsimd library #![feature(portable_simd)] use core_simd::simd::prelude::*; @@ -33,7 +31,7 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 { } // We now move on to the SIMD implementations: notice the following constructs: -// `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors +// `as_chunks::<4>`: mapping this over the vector will let us construct SIMD vectors // `f32x4::from_array`: construct the SIMD vector from a slice // `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them. // This approach essentially uses SIMD to produce a vector of length N/4 of all the products, @@ -42,9 +40,11 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 { pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .map(|(a, b)| (a * b).reduce_sum()) .sum() } @@ -60,9 +60,11 @@ pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1) .reduce_sum() } @@ -74,9 +76,11 @@ pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 let mut res = f32x4::splat(0.0); - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .for_each(|(a, b)| { res = a.mul_add(b, res); }); @@ -113,9 +117,11 @@ pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 { // next example. pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { let mut sum = a - .array_chunks::<4>() + .as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .map(|(a, b)| a * b) .fold(f32x4::splat(0.0), std::ops::Add::add) .reduce_sum(); @@ -131,9 +137,11 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that. // Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration. pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 { - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc)) .reduce_sum() } diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index bad86414401d..ad2eea9153e0 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -1,7 +1,7 @@ //! 4x4 matrix inverse // Code ported from the `packed_simd` crate // Run this code with `cargo test --example matrix_inversion` -#![feature(array_chunks, portable_simd)] +#![feature(portable_simd)] use core_simd::simd::prelude::*; // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs index 3a540f5a0490..90c520e75bb3 100644 --- a/crates/core_simd/src/fmt.rs +++ b/crates/core_simd/src/fmt.rs @@ -1,9 +1,8 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Simd, SimdElement}; use core::fmt; impl fmt::Debug for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + fmt::Debug, { /// A `Simd` has a debug format like the one for `[T]`: diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs index b3732fd74d5f..fdc458efeda4 100644 --- a/crates/core_simd/src/iter.rs +++ b/crates/core_simd/src/iter.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; use core::{ iter::{Product, Sum}, ops::{Add, Mul}, @@ -7,8 +7,6 @@ use core::{ macro_rules! impl_traits { { $type:ty } => { impl Sum for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -17,8 +15,6 @@ macro_rules! impl_traits { } impl Product for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { @@ -27,8 +23,6 @@ macro_rules! impl_traits { } impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -37,8 +31,6 @@ macro_rules! impl_traits { } impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs deleted file mode 100644 index bbdfd5f5f3ed..000000000000 --- a/crates/core_simd/src/lane_count.rs +++ /dev/null @@ -1,40 +0,0 @@ -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - -/// Specifies the number of lanes in a SIMD vector as a type. -pub struct LaneCount; - -impl LaneCount { - /// The number of bytes in a bitmask with this many lanes. - pub const BITMASK_LEN: usize = N.div_ceil(8); -} - -/// Statically guarantees that a lane count is marked as supported. -/// -/// This trait is *sealed*: the list of implementors below is total. -/// Users do not have the ability to mark additional `LaneCount` values as supported. -/// Only SIMD vectors with supported lane counts are constructable. -pub trait SupportedLaneCount: Sealed { - #[doc(hidden)] - type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; -} - -impl Sealed for LaneCount {} - -macro_rules! supported_lane_count { - ($($lanes:literal),+) => { - $( - impl SupportedLaneCount for LaneCount<$lanes> { - type BitMask = [u8; ($lanes + 7) / 8]; - } - )+ - }; -} - -supported_lane_count!( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64 -); diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 717b882b64ba..fe26d99b9194 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -9,7 +9,8 @@ simd_ffi, staged_api, prelude_import, - ptr_metadata + ptr_metadata, + rustc_attrs )] #![cfg_attr( all( @@ -30,10 +31,6 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) )] -#![cfg_attr( - all(target_arch = "x86_64", target_feature = "avx512f"), - feature(stdarch_x86_avx512) -)] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny( unsafe_op_in_unsafe_fn, @@ -41,7 +38,7 @@ clippy::undocumented_unsafe_blocks )] #![doc(test(attr(deny(warnings))))] -#![allow(internal_features)] +#![allow(internal_features, clippy::repr_packed_without_abi)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 19d45f4d3b31..3e2209556b66 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -2,20 +2,33 @@ //! Types representing #![allow(non_camel_case_types)] -#[cfg_attr( - not(all(target_arch = "x86_64", target_feature = "avx512f")), - path = "masks/full_masks.rs" -)] -#[cfg_attr( - all(target_arch = "x86_64", target_feature = "avx512f"), - path = "masks/bitmask.rs" -)] -mod mask_impl; - -use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; +use crate::simd::{Select, Simd, SimdCast, SimdElement}; use core::cmp::Ordering; use core::{fmt, mem}; +pub(crate) trait FixEndianness { + fn fix_endianness(self) -> Self; +} + +macro_rules! impl_fix_endianness { + { $($int:ty),* } => { + $( + impl FixEndianness for $int { + #[inline(always)] + fn fix_endianness(self) -> Self { + if cfg!(target_endian = "big") { + <$int>::reverse_bits(self) + } else { + self + } + } + } + )* + } +} + +impl_fix_endianness! { u8, u16, u32, u64 } + mod sealed { use super::*; @@ -28,7 +41,6 @@ mod sealed { pub trait Sealed { fn valid(values: Simd) -> bool where - LaneCount: SupportedLaneCount, Self: SimdElement; fn eq(self, other: Self) -> bool; @@ -56,8 +68,6 @@ macro_rules! impl_element { impl Sealed for $ty { #[inline] fn valid(value: Simd) -> bool - where - LaneCount: SupportedLaneCount, { // We can't use `Simd` directly, because `Simd`'s functions call this function and // we will end up with an infinite loop. @@ -108,23 +118,19 @@ impl_element! { isize, usize } /// The layout of this type is unspecified, and may change between platforms /// and/or Rust versions, and code should not assume that it is equivalent to /// `[T; N]`. +/// +/// `N` cannot be 0 and may be at most 64. This limit may be increased in +/// the future. #[repr(transparent)] -pub struct Mask(mask_impl::Mask) +pub struct Mask(Simd) where - T: MaskElement, - LaneCount: SupportedLaneCount; + T: MaskElement; -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} +impl Copy for Mask where T: MaskElement {} impl Clone for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn clone(&self) -> Self { @@ -135,12 +141,12 @@ where impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { /// Constructs a mask by setting all elements to the given value. #[inline] - pub fn splat(value: bool) -> Self { - Self(mask_impl::Mask::splat(value)) + #[rustc_const_unstable(feature = "portable_simd", issue = "86656")] + pub const fn splat(value: bool) -> Self { + Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) } /// Converts an array of bools to a SIMD mask. @@ -156,7 +162,7 @@ where let bytes: [u8; N] = mem::transmute_copy(&array); let bools: Simd = core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); - Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools)) + Mask::from_simd_unchecked(core::intrinsics::simd::simd_cast(bools)) } } @@ -174,7 +180,7 @@ where // This would be hypothetically valid as an "in-place" transmute, // but these are "dependently-sized" types, so copy elision it is! unsafe { - let mut bytes: Simd = core::intrinsics::simd::simd_cast(self.to_int()); + let mut bytes: Simd = core::intrinsics::simd::simd_cast(self.to_simd()); bytes &= Simd::splat(1i8); mem::transmute_copy(&bytes) } @@ -187,12 +193,12 @@ where /// All elements must be either 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub unsafe fn from_int_unchecked(value: Simd) -> Self { + pub unsafe fn from_simd_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant unsafe { core::intrinsics::assume(::valid(value)); - Self(mask_impl::Mask::from_int_unchecked(value)) } + Self(value) } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 @@ -203,25 +209,26 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] #[track_caller] - pub fn from_int(value: Simd) -> Self { + pub fn from_simd(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); // Safety: the validity has been checked - unsafe { Self::from_int_unchecked(value) } + unsafe { Self::from_simd_unchecked(value) } } /// Converts the mask to a vector of integers, where 0 represents `false` and -1 /// represents `true`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_int(self) -> Simd { - self.0.to_int() + pub fn to_simd(self) -> Simd { + self.0 } /// Converts the mask to a mask of any other element size. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn cast(self) -> Mask { - Mask(self.0.convert()) + // Safety: mask elements are integers + unsafe { Mask(core::intrinsics::simd::simd_as(self.0)) } } /// Tests the value of the specified element. @@ -232,7 +239,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, index: usize) -> bool { // Safety: the caller must confirm this invariant - unsafe { self.0.test_unchecked(index) } + unsafe { T::eq(*self.0.as_array().get_unchecked(index), T::TRUE) } } /// Tests the value of the specified element. @@ -243,9 +250,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] #[track_caller] pub fn test(&self, index: usize) -> bool { - assert!(index < N, "element index out of range"); - // Safety: the element index has been checked - unsafe { self.test_unchecked(index) } + T::eq(self.0[index], T::TRUE) } /// Sets the value of the specified element. @@ -256,7 +261,7 @@ where pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) { // Safety: the caller must confirm this invariant unsafe { - self.0.set_unchecked(index, value); + *self.0.as_mut_array().get_unchecked_mut(index) = if value { T::TRUE } else { T::FALSE } } } @@ -267,35 +272,65 @@ where #[inline] #[track_caller] pub fn set(&mut self, index: usize, value: bool) { - assert!(index < N, "element index out of range"); - // Safety: the element index has been checked - unsafe { - self.set_unchecked(index, value); - } + self.0[index] = if value { T::TRUE } else { T::FALSE } } /// Returns true if any element is set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { - self.0.any() + // Safety: `self` is a mask vector + unsafe { core::intrinsics::simd::simd_reduce_any(self.0) } } /// Returns true if all elements are set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { - self.0.all() + // Safety: `self` is a mask vector + unsafe { core::intrinsics::simd::simd_reduce_all(self.0) } } /// Creates a bitmask from a mask. /// /// Each bit is set if the corresponding element in the mask is `true`. - /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64. #[inline] #[must_use = "method returns a new integer and does not mutate the original value"] pub fn to_bitmask(self) -> u64 { - self.0.to_bitmask_integer() + const { + assert!(N <= 64, "number of elements can't be greater than 64"); + } + + #[inline] + unsafe fn to_bitmask_impl( + mask: Mask, + ) -> U + where + T: MaskElement, + { + let resized = mask.resize::(false); + + // Safety: `resized` is an integer vector with length M, which must match T + let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized.0) }; + + // LLVM assumes bit order should match endianness + bitmask.fix_endianness() + } + + // TODO modify simd_bitmask to zero-extend output, making this unnecessary + if N <= 8 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else if N <= 16 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else if N <= 32 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) } + } } /// Creates a mask from a bitmask. @@ -305,7 +340,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask(bitmask: u64) -> Self { - Self(mask_impl::Mask::from_bitmask_integer(bitmask)) + Self(bitmask.select(Simd::splat(T::TRUE), Simd::splat(T::FALSE))) } /// Finds the index of the first set element. @@ -351,7 +386,7 @@ where // Safety: the input and output are integer vectors let index: Simd = unsafe { core::intrinsics::simd::simd_cast(index) }; - let masked_index = self.select(index, Self::splat(true).to_int()); + let masked_index = self.select(index, Self::splat(true).to_simd()); // Safety: the input and output are integer vectors let masked_index: Simd = @@ -376,7 +411,6 @@ where impl From<[bool; N]> for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn from(array: [bool; N]) -> Self { @@ -387,7 +421,6 @@ where impl From> for [bool; N] where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn from(vector: Mask) -> Self { @@ -398,7 +431,6 @@ where impl Default for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn default() -> Self { @@ -409,7 +441,6 @@ where impl PartialEq for Mask where T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -420,7 +451,6 @@ where impl PartialOrd for Mask where T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -431,7 +461,6 @@ where impl fmt::Debug for Mask where T: MaskElement + fmt::Debug, - LaneCount: SupportedLaneCount, { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -444,19 +473,18 @@ where impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitand(self, rhs: Self) -> Self { - Self(self.0 & rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) } } } impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -468,7 +496,6 @@ where impl core::ops::BitAnd> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -480,19 +507,18 @@ where impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitor(self, rhs: Self) -> Self { - Self(self.0 | rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) } } } impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -504,7 +530,6 @@ where impl core::ops::BitOr> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -516,19 +541,18 @@ where impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitxor(self, rhs: Self) -> Self::Output { - Self(self.0 ^ rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) } } } impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -540,7 +564,6 @@ where impl core::ops::BitXor> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -552,30 +575,27 @@ where impl core::ops::Not for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] fn not(self) -> Self::Output { - Self(!self.0) + Self::splat(true) ^ self } } impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: Self) { - self.0 = self.0 & rhs.0; + *self = *self & rhs; } } impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: bool) { @@ -586,18 +606,16 @@ where impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: Self) { - self.0 = self.0 | rhs.0; + *self = *self | rhs; } } impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: bool) { @@ -608,18 +626,16 @@ where impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: Self) { - self.0 = self.0 ^ rhs.0; + *self = *self ^ rhs; } } impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: bool) { @@ -631,8 +647,6 @@ macro_rules! impl_from { { $from:ty => $($to:ty),* } => { $( impl From> for Mask<$to, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn from(value: Mask<$from, N>) -> Self { diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs deleted file mode 100644 index 32d37b553392..000000000000 --- a/crates/core_simd/src/masks/bitmask.rs +++ /dev/null @@ -1,228 +0,0 @@ -#![allow(unused_imports)] -use super::MaskElement; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; -use core::marker::PhantomData; - -/// A mask where each lane is represented by a single bit. -#[repr(transparent)] -pub(crate) struct Mask( - as SupportedLaneCount>::BitMask, - PhantomData, -) -where - T: MaskElement, - LaneCount: SupportedLaneCount; - -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Clone for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn clone(&self) -> Self { - *self - } -} - -impl PartialEq for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn eq(&self, other: &Self) -> bool { - self.0.as_ref() == other.0.as_ref() - } -} - -impl PartialOrd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.0.as_ref().partial_cmp(other.0.as_ref()) - } -} - -impl Eq for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Ord for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.as_ref().cmp(other.0.as_ref()) - } -} - -impl Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn splat(value: bool) -> Self { - let mut mask = as SupportedLaneCount>::BitMask::default(); - if value { - mask.as_mut().fill(u8::MAX) - } else { - mask.as_mut().fill(u8::MIN) - } - if N % 8 > 0 { - *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); - } - Self(mask, PhantomData) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool { - (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0 - } - - #[inline] - pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { - unsafe { - self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8) - } - } - - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub(crate) fn to_int(self) -> Simd { - unsafe { - core::intrinsics::simd::simd_select_bitmask( - self.0, - Simd::splat(T::TRUE), - Simd::splat(T::FALSE), - ) - } - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) unsafe fn from_int_unchecked(value: Simd) -> Self { - unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) } - } - - #[inline] - pub(crate) fn to_bitmask_integer(self) -> u64 { - let mut bitmask = [0u8; 8]; - bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); - u64::from_ne_bytes(bitmask) - } - - #[inline] - pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { - let mut bytes = as SupportedLaneCount>::BitMask::default(); - let len = bytes.as_mut().len(); - bytes - .as_mut() - .copy_from_slice(&bitmask.to_ne_bytes()[..len]); - Self(bytes, PhantomData) - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn convert(self) -> Mask - where - U: MaskElement, - { - // Safety: bitmask layout does not depend on the element width - unsafe { core::mem::transmute_copy(&self) } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn any(self) -> bool { - self != Self::splat(false) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn all(self) -> bool { - self == Self::splat(true) - } -} - -impl core::ops::BitAnd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, -{ - type Output = Self; - #[inline] - fn bitand(mut self, rhs: Self) -> Self { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l &= r; - } - self - } -} - -impl core::ops::BitOr for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, -{ - type Output = Self; - #[inline] - fn bitor(mut self, rhs: Self) -> Self { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l |= r; - } - self - } -} - -impl core::ops::BitXor for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitxor(mut self, rhs: Self) -> Self::Output { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l ^= r; - } - self - } -} - -impl core::ops::Not for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn not(mut self) -> Self::Output { - for x in self.0.as_mut() { - *x = !*x; - } - if N % 8 > 0 { - *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); - } - self - } -} diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs deleted file mode 100644 index 4e98db4070a9..000000000000 --- a/crates/core_simd/src/masks/full_masks.rs +++ /dev/null @@ -1,296 +0,0 @@ -//! Masks that take up full SIMD vector registers. - -use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; - -#[repr(transparent)] -pub(crate) struct Mask(Simd) -where - T: MaskElement, - LaneCount: SupportedLaneCount; - -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Clone for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn clone(&self) -> Self { - *self - } -} - -impl PartialEq for Mask -where - T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn eq(&self, other: &Self) -> bool { - self.0.eq(&other.0) - } -} - -impl PartialOrd for Mask -where - T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.0.partial_cmp(&other.0) - } -} - -impl Eq for Mask -where - T: MaskElement + Eq, - LaneCount: SupportedLaneCount, -{ -} - -impl Ord for Mask -where - T: MaskElement + Ord, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.cmp(&other.0) - } -} - -// Used for bitmask bit order workaround -pub(crate) trait ReverseBits { - // Reverse the least significant `n` bits of `self`. - // (Remaining bits must be 0.) - fn reverse_bits(self, n: usize) -> Self; -} - -macro_rules! impl_reverse_bits { - { $($int:ty),* } => { - $( - impl ReverseBits for $int { - #[inline(always)] - fn reverse_bits(self, n: usize) -> Self { - let rev = <$int>::reverse_bits(self); - let bitsize = size_of::<$int>() * 8; - if n < bitsize { - // Shift things back to the right - rev >> (bitsize - n) - } else { - rev - } - } - } - )* - } -} - -impl_reverse_bits! { u8, u16, u32, u64 } - -impl Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn splat(value: bool) -> Self { - Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool { - T::eq(self.0[lane], T::TRUE) - } - - #[inline] - pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { - self.0[lane] = if value { T::TRUE } else { T::FALSE } - } - - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub(crate) fn to_int(self) -> Simd { - self.0 - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) unsafe fn from_int_unchecked(value: Simd) -> Self { - Self(value) - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn convert(self) -> Mask - where - U: MaskElement, - { - // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. - unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) } - } - - #[inline] - unsafe fn to_bitmask_impl(self) -> U - where - LaneCount: SupportedLaneCount, - { - let resized = self.to_int().resize::(T::FALSE); - - // Safety: `resized` is an integer vector with length M, which must match T - let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) }; - - // LLVM assumes bit order should match endianness - if cfg!(target_endian = "big") { - bitmask.reverse_bits(M) - } else { - bitmask - } - } - - #[inline] - unsafe fn from_bitmask_impl(bitmask: U) -> Self - where - LaneCount: SupportedLaneCount, - { - // LLVM assumes bit order should match endianness - let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits(M) - } else { - bitmask - }; - - // SAFETY: `mask` is the correct bitmask type for a u64 bitmask - let mask: Simd = unsafe { - core::intrinsics::simd::simd_select_bitmask( - bitmask, - Simd::::splat(T::TRUE), - Simd::::splat(T::FALSE), - ) - }; - - // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` - unsafe { Self::from_int_unchecked(mask.resize::(T::FALSE)) } - } - - #[inline] - pub(crate) fn to_bitmask_integer(self) -> u64 { - // TODO modify simd_bitmask to zero-extend output, making this unnecessary - if N <= 8 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else if N <= 16 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else if N <= 32 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() } - } - } - - #[inline] - pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { - // TODO modify simd_bitmask_select to truncate input, making this unnecessary - if N <= 8 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u8) } - } else if N <= 16 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u16) } - } else if N <= 32 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u32) } - } else { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask) } - } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn any(self) -> bool { - // Safety: use `self` as an integer vector - unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn all(self) -> bool { - // Safety: use `self` as an integer vector - unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) } - } -} - -impl From> for Simd -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn from(value: Mask) -> Self { - value.0 - } -} - -impl core::ops::BitAnd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitand(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) } - } -} - -impl core::ops::BitOr for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitor(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) } - } -} - -impl core::ops::BitXor for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitxor(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) } - } -} - -impl core::ops::Not for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn not(self) -> Self::Output { - Self::splat(true) ^ self - } -} diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 45b1a0f97514..5f635d80a178 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -5,7 +5,6 @@ mod alias; mod cast; mod fmt; mod iter; -mod lane_count; mod masks; mod ops; mod select; @@ -27,8 +26,8 @@ pub mod simd { pub use crate::core_simd::alias::*; pub use crate::core_simd::cast::*; - pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; + pub use crate::core_simd::select::*; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::to_bytes::ToBytes; pub use crate::core_simd::vector::*; diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index f36e8d01a73b..eb6601f73483 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq}; +use crate::simd::{Select, Simd, SimdElement, cmp::SimdPartialEq}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; @@ -12,7 +12,6 @@ mod unary; impl core::ops::Index for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { type Output = I::Output; @@ -25,7 +24,6 @@ where impl core::ops::IndexMut for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { #[inline] @@ -130,7 +128,6 @@ macro_rules! for_base_types { impl $op for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = $out; diff --git a/crates/core_simd/src/ops/assign.rs b/crates/core_simd/src/ops/assign.rs index d21d867de26d..c1830c35df77 100644 --- a/crates/core_simd/src/ops/assign.rs +++ b/crates/core_simd/src/ops/assign.rs @@ -21,7 +21,6 @@ macro_rules! assign_ops { where Self: $trait, T: SimdElement, - LaneCount: SupportedLaneCount, { #[inline] fn $assign_call(&mut self, rhs: U) { diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs index 913cbbe977c4..360b83c40346 100644 --- a/crates/core_simd/src/ops/deref.rs +++ b/crates/core_simd/src/ops/deref.rs @@ -13,7 +13,6 @@ macro_rules! deref_lhs { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = Simd; @@ -33,7 +32,6 @@ macro_rules! deref_rhs { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = Simd; @@ -64,7 +62,6 @@ macro_rules! deref_ops { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = $simd; diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs index f5115a5a5e93..7ca83dc40f61 100644 --- a/crates/core_simd/src/ops/shift_scalar.rs +++ b/crates/core_simd/src/ops/shift_scalar.rs @@ -1,13 +1,11 @@ // Shift operations uniquely typically only have a scalar on the right-hand side. // Here, we implement shifts for scalar RHS arguments. -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; macro_rules! impl_splatted_shifts { { impl $trait:ident :: $trait_fn:ident for $ty:ty } => { impl core::ops::$trait<$ty> for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -17,8 +15,6 @@ macro_rules! impl_splatted_shifts { } impl core::ops::$trait<&$ty> for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -28,8 +24,6 @@ macro_rules! impl_splatted_shifts { } impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; #[inline] @@ -39,8 +33,6 @@ macro_rules! impl_splatted_shifts { } impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; #[inline] diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index 412a5b801171..e1c06167f979 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Simd, SimdElement}; use core::ops::{Neg, Not}; // unary ops macro_rules! neg { @@ -6,7 +6,6 @@ macro_rules! neg { $(impl Neg for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = Self; @@ -40,7 +39,6 @@ macro_rules! not { $(impl Not for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = Self; diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index f33aa261a928..404f54d8f382 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -1,54 +1,155 @@ -use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{FixEndianness, Mask, MaskElement, Simd, SimdElement}; -impl Mask +/// Choose elements from two vectors using a mask. +/// +/// For each element in the mask, choose the corresponding element from `true_values` if +/// that element mask is true, and `false_values` if that element mask is false. +/// +/// If the mask is `u64`, it's treated as a bitmask with the least significant bit +/// corresponding to the first element. +/// +/// # Examples +/// +/// ## Selecting values from `Simd` +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Simd, Mask, Select}; +/// let a = Simd::from_array([0, 1, 2, 3]); +/// let b = Simd::from_array([4, 5, 6, 7]); +/// let mask = Mask::::from_array([true, false, false, true]); +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [0, 5, 6, 3]); +/// ``` +/// +/// ## Selecting values from `Mask` +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Mask, Select}; +/// let a = Mask::::from_array([true, true, false, false]); +/// let b = Mask::::from_array([false, false, true, true]); +/// let mask = Mask::::from_array([true, false, false, true]); +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [true, false, true, false]); +/// ``` +/// +/// ## Selecting with a bitmask +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Mask, Select}; +/// let a = Mask::::from_array([true, true, false, false]); +/// let b = Mask::::from_array([false, false, true, true]); +/// let mask = 0b1001; +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [true, false, true, false]); +/// ``` +pub trait Select { + /// Choose elements + fn select(self, true_values: T, false_values: T) -> T; +} + +impl Select> for Mask where - T: MaskElement, - LaneCount: SupportedLaneCount, + T: SimdElement, + U: MaskElement, { - /// Choose elements from two vectors. - /// - /// For each element in the mask, choose the corresponding element from `true_values` if - /// that element mask is true, and `false_values` if that element mask is false. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, Mask}; - /// let a = Simd::from_array([0, 1, 2, 3]); - /// let b = Simd::from_array([4, 5, 6, 7]); - /// let mask = Mask::from_array([true, false, false, true]); - /// let c = mask.select(a, b); - /// assert_eq!(c.to_array(), [0, 5, 6, 3]); - /// ``` #[inline] - #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn select(self, true_values: Simd, false_values: Simd) -> Simd - where - U: SimdElement, - { - // Safety: The mask has been cast to a vector of integers, - // and the operands to select between are vectors of the same type and length. - unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) } - } - - /// Choose elements from two masks. - /// - /// For each element in the mask, choose the corresponding element from `true_values` if - /// that element mask is true, and `false_values` if that element mask is false. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Mask; - /// let a = Mask::::from_array([true, true, false, false]); - /// let b = Mask::::from_array([false, false, true, true]); - /// let mask = Mask::::from_array([true, false, false, true]); - /// let c = mask.select_mask(a, b); - /// assert_eq!(c.to_array(), [true, false, true, false]); - /// ``` - #[inline] - #[must_use = "method returns a new mask and does not mutate the original inputs"] - pub fn select_mask(self, true_values: Self, false_values: Self) -> Self { - self & true_values | !self & false_values + fn select(self, true_values: Simd, false_values: Simd) -> Simd { + // Safety: + // simd_as between masks is always safe (they're vectors of ints). + // simd_select uses a mask that matches the width and number of elements + unsafe { + let mask: Simd = core::intrinsics::simd::simd_as(self.to_simd()); + core::intrinsics::simd::simd_select(mask, true_values, false_values) + } + } +} + +impl Select> for u64 +where + T: SimdElement, +{ + #[inline] + fn select(self, true_values: Simd, false_values: Simd) -> Simd { + const { + assert!(N <= 64, "number of elements can't be greater than 64"); + } + + #[inline] + unsafe fn select_impl( + bitmask: U, + true_values: Simd, + false_values: Simd, + ) -> Simd + where + T: SimdElement, + { + let default = true_values[0]; + let true_values = true_values.resize::(default); + let false_values = false_values.resize::(default); + + // LLVM assumes bit order should match endianness + let bitmask = bitmask.fix_endianness(); + + // Safety: the caller guarantees that the size of U matches M + let selected = unsafe { + core::intrinsics::simd::simd_select_bitmask(bitmask, true_values, false_values) + }; + + selected.resize::(default) + } + + // TODO modify simd_bitmask_select to truncate input, making this unnecessary + if N <= 8 { + let bitmask = self as u8; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else if N <= 16 { + let bitmask = self as u16; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else if N <= 32 { + let bitmask = self as u32; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else { + let bitmask = self; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } + } +} + +impl Select> for Mask +where + T: MaskElement, + U: MaskElement, +{ + #[inline] + fn select(self, true_values: Mask, false_values: Mask) -> Mask { + let selected: Simd = + Select::select(self, true_values.to_simd(), false_values.to_simd()); + + // Safety: all values come from masks + unsafe { Mask::from_simd_unchecked(selected) } + } +} + +impl Select> for u64 +where + T: MaskElement, +{ + #[inline] + fn select(self, true_values: Mask, false_values: Mask) -> Mask { + let selected: Simd = + Select::select(self, true_values.to_simd(), false_values.to_simd()); + + // Safety: all values come from masks + unsafe { Mask::from_simd_unchecked(selected) } } } diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index 2312ba401fa7..d553d6c040c9 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -1,5 +1,5 @@ use crate::simd::{ - LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, + Mask, Simd, SimdElement, ptr::{SimdConstPtr, SimdMutPtr}, }; @@ -21,8 +21,6 @@ macro_rules! impl_number { { $($number:ty),* } => { $( impl SimdPartialEq for Simd<$number, N> - where - LaneCount: SupportedLaneCount, { type Mask = Mask<<$number as SimdElement>::Mask, N>; @@ -30,14 +28,14 @@ macro_rules! impl_number { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_eq(self, other)) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ne(self, other)) } } } )* @@ -50,8 +48,6 @@ macro_rules! impl_mask { { $($integer:ty),* } => { $( impl SimdPartialEq for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { type Mask = Self; @@ -59,14 +55,14 @@ macro_rules! impl_mask { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_eq(self.to_simd(), other.to_simd())) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ne(self.to_simd(), other.to_simd())) } } } )* @@ -75,10 +71,7 @@ macro_rules! impl_mask { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialEq for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialEq for Simd<*const T, N> { type Mask = Mask; #[inline] @@ -92,10 +85,7 @@ where } } -impl SimdPartialEq for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialEq for Simd<*mut T, N> { type Mask = Mask; #[inline] diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index e813e7613032..5672fbbf54ca 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -1,5 +1,5 @@ use crate::simd::{ - LaneCount, Mask, Simd, SupportedLaneCount, + Mask, Select, Simd, cmp::SimdPartialEq, ptr::{SimdConstPtr, SimdMutPtr}, }; @@ -49,41 +49,37 @@ macro_rules! impl_integer { { $($integer:ty),* } => { $( impl SimdPartialOrd for Simd<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } impl SimdOrd for Simd<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { @@ -115,35 +111,33 @@ macro_rules! impl_float { { $($float:ty),* } => { $( impl SimdPartialOrd for Simd<$float, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } )* @@ -156,50 +150,46 @@ macro_rules! impl_mask { { $($integer:ty),* } => { $( impl SimdPartialOrd for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_lt(self.to_simd(), other.to_simd())) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_le(self.to_simd(), other.to_simd())) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_gt(self.to_simd(), other.to_simd())) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ge(self.to_simd(), other.to_simd())) } } } impl SimdOrd for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { - self.simd_gt(other).select_mask(other, self) + self.simd_gt(other).select(other, self) } #[inline] fn simd_min(self, other: Self) -> Self { - self.simd_lt(other).select_mask(other, self) + self.simd_lt(other).select(other, self) } #[inline] @@ -218,10 +208,7 @@ macro_rules! impl_mask { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialOrd for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialOrd for Simd<*const T, N> { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { self.addr().simd_lt(other.addr()) @@ -243,10 +230,7 @@ where } } -impl SimdOrd for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdOrd for Simd<*const T, N> { #[inline] fn simd_max(self, other: Self) -> Self { self.simd_lt(other).select(other, self) @@ -268,10 +252,7 @@ where } } -impl SimdPartialOrd for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialOrd for Simd<*mut T, N> { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { self.addr().simd_lt(other.addr()) @@ -293,10 +274,7 @@ where } } -impl SimdOrd for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdOrd for Simd<*mut T, N> { #[inline] fn simd_max(self, other: Self) -> Self { self.simd_lt(other).select(other, self) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index b5972c47373b..efd7c2469512 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, + Mask, Select, Simd, SimdCast, SimdElement, cmp::{SimdPartialEq, SimdPartialOrd}, }; @@ -240,15 +240,9 @@ pub trait SimdFloat: Copy + Sealed { macro_rules! impl_trait { { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} impl SimdFloat for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>; type Scalar = $ty; diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs index e7253313f036..eee54d396880 100644 --- a/crates/core_simd/src/simd/num/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -1,7 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd, - cmp::SimdPartialOrd, num::SimdUint, + Mask, Select, Simd, SimdCast, SimdElement, cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, }; /// Operations on SIMD vectors of signed integers. @@ -242,16 +241,9 @@ pub trait SimdInt: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($unsigned:ident)),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} - impl SimdInt for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { + impl SimdInt for Simd<$ty, N> { type Mask = Mask<<$ty as SimdElement>::Mask, N>; type Scalar = $ty; type Unsigned = Simd<$unsigned, N>; diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs index e3ba8658bd80..606107a1f06f 100644 --- a/crates/core_simd/src/simd/num/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd}; +use crate::simd::{Simd, SimdCast, SimdElement, cmp::SimdOrd}; /// Operations on SIMD vectors of unsigned integers. pub trait SimdUint: Copy + Sealed { @@ -124,15 +124,9 @@ pub trait SimdUint: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($signed:ident)),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} impl SimdUint for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Scalar = $ty; type Cast = Simd; diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index 36452e7ae920..7ef9dc21373e 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint}; +use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { @@ -88,12 +88,9 @@ pub trait SimdConstPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*const T, N> where LaneCount: SupportedLaneCount {} +impl Sealed for Simd<*const T, N> {} -impl SimdConstPtr for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdConstPtr for Simd<*const T, N> { type Usize = Simd; type Isize = Simd; type CastPtr = Simd<*const U, N>; diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index c644f390c20a..3b9b75ddf566 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint}; +use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { @@ -85,12 +85,9 @@ pub trait SimdMutPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*mut T, N> where LaneCount: SupportedLaneCount {} +impl Sealed for Simd<*mut T, N> {} -impl SimdMutPtr for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdMutPtr for Simd<*mut T, N> { type Usize = Simd; type Isize = Simd; type CastPtr = Simd<*mut U, N>; diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index dbdd6ef40eba..02dcd71356dd 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Mask, MaskElement, Simd, SimdElement}; /// Constructs a new SIMD vector by copying elements from selected elements in other vectors. /// @@ -82,8 +82,6 @@ pub trait Swizzle { fn swizzle(vector: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // Safety: `vector` is a vector, and the index is a const vector of u32. unsafe { @@ -122,8 +120,6 @@ pub trait Swizzle { fn concat_swizzle(first: Simd, second: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // Safety: `first` and `second` are vectors, and the index is a const vector of u32. unsafe { @@ -161,11 +157,9 @@ pub trait Swizzle { fn swizzle_mask(mask: Mask) -> Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // SAFETY: all elements of this mask come from another mask - unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) } + unsafe { Mask::from_simd_unchecked(Self::swizzle(mask.to_simd())) } } /// Creates a new mask from the elements of `first` and `second`. @@ -177,18 +171,17 @@ pub trait Swizzle { fn concat_swizzle_mask(first: Mask, second: Mask) -> Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // SAFETY: all elements of this mask come from another mask - unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) } + unsafe { + Mask::from_simd_unchecked(Self::concat_swizzle(first.to_simd(), second.to_simd())) + } } } impl Simd where T: SimdElement, - LaneCount: SupportedLaneCount, { /// Reverse the order of the elements in the vector. #[inline] @@ -462,10 +455,7 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn resize(self, value: T) -> Simd - where - LaneCount: SupportedLaneCount, - { + pub fn resize(self, value: T) -> Simd { struct Resize; impl Swizzle for Resize { const INDEX: [usize; M] = const { @@ -493,10 +483,7 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn extract(self) -> Simd - where - LaneCount: SupportedLaneCount, - { + pub fn extract(self) -> Simd { struct Extract; impl Swizzle for Extract { const INDEX: [usize; LEN] = const { @@ -517,14 +504,13 @@ where impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { /// Reverse the order of the elements in the mask. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn reverse(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().reverse()) } + unsafe { Self::from_simd_unchecked(self.to_simd().reverse()) } } /// Rotates the mask such that the first `OFFSET` elements of the slice move to the end @@ -534,7 +520,7 @@ where #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_elements_left(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::()) } + unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_left::()) } } /// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to @@ -544,7 +530,7 @@ where #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_elements_right(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::()) } + unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_right::()) } } /// Shifts the mask elements to the left by `OFFSET`, filling in with @@ -554,7 +540,7 @@ where pub fn shift_elements_left(self, padding: bool) -> Self { // Safety: swizzles are safe for masks unsafe { - Self::from_int_unchecked(self.to_int().shift_elements_left::(if padding { + Self::from_simd_unchecked(self.to_simd().shift_elements_left::(if padding { T::TRUE } else { T::FALSE @@ -569,7 +555,7 @@ where pub fn shift_elements_right(self, padding: bool) -> Self { // Safety: swizzles are safe for masks unsafe { - Self::from_int_unchecked(self.to_int().shift_elements_right::(if padding { + Self::from_simd_unchecked(self.to_simd().shift_elements_right::(if padding { T::TRUE } else { T::FALSE @@ -598,9 +584,9 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn interleave(self, other: Self) -> (Self, Self) { - let (lo, hi) = self.to_int().interleave(other.to_int()); + let (lo, hi) = self.to_simd().interleave(other.to_simd()); // Safety: swizzles are safe for masks - unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) } + unsafe { (Self::from_simd_unchecked(lo), Self::from_simd_unchecked(hi)) } } /// Deinterleave two masks. @@ -627,12 +613,12 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn deinterleave(self, other: Self) -> (Self, Self) { - let (even, odd) = self.to_int().deinterleave(other.to_int()); + let (even, odd) = self.to_simd().deinterleave(other.to_simd()); // Safety: swizzles are safe for masks unsafe { ( - Self::from_int_unchecked(even), - Self::from_int_unchecked(odd), + Self::from_simd_unchecked(even), + Self::from_simd_unchecked(odd), ) } } @@ -653,13 +639,10 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn resize(self, value: bool) -> Mask - where - LaneCount: SupportedLaneCount, - { + pub fn resize(self, value: bool) -> Mask { // Safety: swizzles are safe for masks unsafe { - Mask::::from_int_unchecked(self.to_int().resize::(if value { + Mask::::from_simd_unchecked(self.to_simd().resize::(if value { T::TRUE } else { T::FALSE @@ -679,11 +662,8 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn extract(self) -> Mask - where - LaneCount: SupportedLaneCount, - { + pub fn extract(self) -> Mask { // Safety: swizzles are safe for masks - unsafe { Mask::::from_int_unchecked(self.to_int().extract::()) } + unsafe { Mask::::from_simd_unchecked(self.to_simd().extract::()) } } } diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index 773bd028bae0..ae0b174973da 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -1,10 +1,7 @@ -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; use core::mem; -impl Simd -where - LaneCount: SupportedLaneCount, -{ +impl Simd { /// Swizzle a vector of bytes according to the index vector. /// Indices within range select the appropriate byte. /// Indices "out of bounds" instead select 0. @@ -139,7 +136,7 @@ unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd, idxs: Simd) -> S #[inline] #[allow(clippy::let_and_return)] unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { - use crate::simd::cmp::SimdPartialOrd; + use crate::simd::{Select, cmp::SimdPartialOrd}; #[cfg(target_arch = "x86")] use core::arch::x86; #[cfg(target_arch = "x86_64")] @@ -184,10 +181,7 @@ unsafe fn transize( f: unsafe fn(T, T) -> T, a: Simd, b: Simd, -) -> Simd -where - LaneCount: SupportedLaneCount, -{ +) -> Simd { // SAFETY: Same obligation to use this function as to use mem::transmute_copy. unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) } } @@ -196,11 +190,8 @@ where #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[allow(unused)] #[inline(always)] -fn zeroing_idxs(idxs: Simd) -> Simd -where - LaneCount: SupportedLaneCount, -{ - use crate::simd::cmp::SimdPartialOrd; +fn zeroing_idxs(idxs: Simd) -> Simd { + use crate::simd::{Select, cmp::SimdPartialOrd}; idxs.simd_lt(Simd::splat(N as u8)) .select(idxs, Simd::splat(u8::MAX)) } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index fee2cc06c5b0..1fd285e457db 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,12 +1,12 @@ use crate::simd::{ - LaneCount, Simd, SimdElement, SupportedLaneCount, + Simd, SimdElement, num::{SimdFloat, SimdInt, SimdUint}, }; mod sealed { use super::*; pub trait Sealed {} - impl Sealed for Simd where LaneCount: SupportedLaneCount {} + impl Sealed for Simd {} } use sealed::Sealed; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index f40031f8c4da..5b3a689f3611 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,5 +1,7 @@ +use core::intrinsics::simd::SimdAlign; + use crate::simd::{ - LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, + Mask, MaskElement, cmp::SimdPartialOrd, num::SimdUint, ptr::{SimdConstPtr, SimdMutPtr}, @@ -51,6 +53,8 @@ use crate::simd::{ /// Thus it is sound to [`transmute`] `Simd` to `[T; N]` and should optimize to "zero cost", /// but the reverse transmutation may require a copy the compiler cannot simply elide. /// +/// `N` cannot be 0 and may be at most 64. This limit may be increased in the future. +/// /// # ABI "Features" /// Due to Rust's safety guarantees, `Simd` is currently passed and returned via memory, /// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept @@ -100,14 +104,13 @@ use crate::simd::{ // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also // causes rustc to emit illegal LLVM IR in some cases. #[repr(simd, packed)] +#[rustc_simd_monomorphize_lane_limit = "64"] pub struct Simd([T; N]) where - LaneCount: SupportedLaneCount, T: SimdElement; impl Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { /// Number of elements in this vector. @@ -146,30 +149,8 @@ where #[inline] #[rustc_const_unstable(feature = "portable_simd", issue = "86656")] pub const fn splat(value: T) -> Self { - const fn splat_const(value: T) -> Simd - where - T: SimdElement, - LaneCount: SupportedLaneCount, - { - Simd::from_array([value; N]) - } - - fn splat_rt(value: T) -> Simd - where - T: SimdElement, - LaneCount: SupportedLaneCount, - { - // This is preferred over `[value; N]`, since it's explicitly a splat: - // https://github.com/rust-lang/rust/issues/97804 - struct Splat; - impl Swizzle for Splat { - const INDEX: [usize; N] = [0; N]; - } - - Splat::swizzle::(Simd::::from([value])) - } - - core::intrinsics::const_eval_select((value,), splat_const, splat_rt) + // SAFETY: T is a SimdElement, and the item type of Self. + unsafe { core::intrinsics::simd::simd_splat(value) } } /// Returns an array reference containing the entire SIMD vector. @@ -195,7 +176,7 @@ where /// Returns a mutable array reference containing the entire SIMD vector. #[inline] - pub fn as_mut_array(&mut self) -> &mut [T; N] { + pub const fn as_mut_array(&mut self) -> &mut [T; N] { // SAFETY: `Simd` is just an overaligned `[T; N]` with // potential padding at the end, so pointer casting to a // `&mut [T; N]` is safe. @@ -324,7 +305,7 @@ where /// ``` #[inline] #[track_caller] - pub fn copy_to_slice(self, slice: &mut [T]) { + pub const fn copy_to_slice(self, slice: &mut [T]) { assert!( slice.len() >= Self::LEN, "slice length must be at least the number of elements" @@ -465,7 +446,7 @@ where /// value from `or` is passed through. /// /// # Safety - /// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`. + /// Enabled `ptr` elements must be safe to read as if by `core::ptr::read`. #[must_use] #[inline] pub unsafe fn load_select_ptr( @@ -475,12 +456,11 @@ where ) -> Self { // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. unsafe { - core::intrinsics::simd::simd_masked_load::< - _, - _, - _, - { core::intrinsics::simd::SimdAlign::Element }, - >(enable.to_int(), ptr, or) + core::intrinsics::simd::simd_masked_load::<_, _, _, { SimdAlign::Element }>( + enable.to_simd(), + ptr, + or, + ) } } @@ -659,7 +639,7 @@ where or: Self, ) -> Self { // Safety: The caller is responsible for upholding all invariants - unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_simd()) } } /// Conditionally write contiguous elements to `slice`. The `enable` mask controls @@ -731,12 +711,11 @@ where pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. unsafe { - core::intrinsics::simd::simd_masked_store::< - _, - _, - _, - { core::intrinsics::simd::SimdAlign::Element }, - >(enable.to_int(), ptr, self) + core::intrinsics::simd::simd_masked_store::<_, _, _, { SimdAlign::Element }>( + enable.to_simd(), + ptr, + self, + ) } } @@ -896,20 +875,14 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask) { // Safety: The caller is responsible for upholding all invariants - unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_simd()) } } } -impl Copy for Simd -where - LaneCount: SupportedLaneCount, - T: SimdElement, -{ -} +impl Copy for Simd where T: SimdElement {} impl Clone for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -920,7 +893,6 @@ where impl Default for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + Default, { #[inline] @@ -931,7 +903,6 @@ where impl PartialEq for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + PartialEq, { #[inline] @@ -940,7 +911,7 @@ where let mask = unsafe { let tfvec: Simd<::Mask, N> = core::intrinsics::simd::simd_eq(*self, *other); - Mask::from_int_unchecked(tfvec) + Mask::from_simd_unchecked(tfvec) }; // Two vectors are equal if all elements are equal when compared elementwise @@ -954,7 +925,7 @@ where let mask = unsafe { let tfvec: Simd<::Mask, N> = core::intrinsics::simd::simd_ne(*self, *other); - Mask::from_int_unchecked(tfvec) + Mask::from_simd_unchecked(tfvec) }; // Two vectors are non-equal if any elements are non-equal when compared elementwise @@ -965,7 +936,6 @@ where /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl PartialOrd for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + PartialOrd, { #[inline] @@ -975,17 +945,11 @@ where } } -impl Eq for Simd -where - LaneCount: SupportedLaneCount, - T: SimdElement + Eq, -{ -} +impl Eq for Simd where T: SimdElement + Eq {} /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl Ord for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + Ord, { #[inline] @@ -997,7 +961,6 @@ where impl core::hash::Hash for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + core::hash::Hash, { #[inline] @@ -1012,7 +975,6 @@ where // array references impl AsRef<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1023,7 +985,6 @@ where impl AsMut<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1035,7 +996,6 @@ where // slice references impl AsRef<[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1046,7 +1006,6 @@ where impl AsMut<[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1058,7 +1017,6 @@ where // vector/array conversion impl From<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1069,7 +1027,6 @@ where impl From> for [T; N] where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1080,7 +1037,6 @@ where impl TryFrom<&[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; @@ -1093,7 +1049,6 @@ where impl TryFrom<&mut [T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; @@ -1231,10 +1186,7 @@ where } #[inline] -fn lane_indices() -> Simd -where - LaneCount: SupportedLaneCount, -{ +fn lane_indices() -> Simd { #![allow(clippy::needless_range_loop)] let mut index = [0; N]; for i in 0..N { @@ -1246,7 +1198,6 @@ where #[inline] fn mask_up_to(len: usize) -> Mask where - LaneCount: SupportedLaneCount, M: MaskElement, { let index = lane_indices::(); diff --git a/crates/core_simd/src/vendor/loongarch64.rs b/crates/core_simd/src/vendor/loongarch64.rs index 1290bc166b2b..1f84cdb971ec 100644 --- a/crates/core_simd/src/vendor/loongarch64.rs +++ b/crates/core_simd/src/vendor/loongarch64.rs @@ -1,31 +1,26 @@ use crate::simd::*; use core::arch::loongarch64::*; -from_transmute! { unsafe u8x16 => v16u8 } -from_transmute! { unsafe u8x32 => v32u8 } -from_transmute! { unsafe i8x16 => v16i8 } -from_transmute! { unsafe i8x32 => v32i8 } +from_transmute! { unsafe u8x16 => m128i } +from_transmute! { unsafe u8x32 => m256i } +from_transmute! { unsafe i8x16 => m128i } +from_transmute! { unsafe i8x32 => m256i } -from_transmute! { unsafe u16x8 => v8u16 } -from_transmute! { unsafe u16x16 => v16u16 } -from_transmute! { unsafe i16x8 => v8i16 } -from_transmute! { unsafe i16x16 => v16i16 } +from_transmute! { unsafe u16x8 => m128i } +from_transmute! { unsafe u16x16 => m256i } +from_transmute! { unsafe i16x8 => m128i } +from_transmute! { unsafe i16x16 => m256i } -from_transmute! { unsafe u32x4 => v4u32 } -from_transmute! { unsafe u32x8 => v8u32 } -from_transmute! { unsafe i32x4 => v4i32 } -from_transmute! { unsafe i32x8 => v8i32 } -from_transmute! { unsafe f32x4 => v4f32 } -from_transmute! { unsafe f32x8 => v8f32 } +from_transmute! { unsafe u32x4 => m128i } +from_transmute! { unsafe u32x8 => m256i } +from_transmute! { unsafe i32x4 => m128i } +from_transmute! { unsafe i32x8 => m256i } +from_transmute! { unsafe f32x4 => m128 } +from_transmute! { unsafe f32x8 => m256 } -from_transmute! { unsafe u64x2 => v2u64 } -from_transmute! { unsafe u64x4 => v4u64 } -from_transmute! { unsafe i64x2 => v2i64 } -from_transmute! { unsafe i64x4 => v4i64 } -from_transmute! { unsafe f64x2 => v2f64 } -from_transmute! { unsafe f64x4 => v4f64 } - -from_transmute! { unsafe usizex2 => v2u64 } -from_transmute! { unsafe usizex4 => v4u64 } -from_transmute! { unsafe isizex2 => v2i64 } -from_transmute! { unsafe isizex4 => v4i64 } +from_transmute! { unsafe u64x2 => m128i } +from_transmute! { unsafe u64x4 => m256i } +from_transmute! { unsafe i64x2 => m128i } +from_transmute! { unsafe i64x4 => m256i } +from_transmute! { unsafe f64x2 => m128d } +from_transmute! { unsafe f64x4 => m256d } diff --git a/crates/core_simd/src/vendor/wasm32.rs b/crates/core_simd/src/vendor/wasm32.rs index ef3baf885b0f..1fdb2bc86d34 100644 --- a/crates/core_simd/src/vendor/wasm32.rs +++ b/crates/core_simd/src/vendor/wasm32.rs @@ -14,17 +14,3 @@ from_transmute! { unsafe f32x4 => v128 } from_transmute! { unsafe u64x2 => v128 } from_transmute! { unsafe i64x2 => v128 } from_transmute! { unsafe f64x2 => v128 } - -#[cfg(target_pointer_width = "32")] -mod p32 { - use super::*; - from_transmute! { unsafe usizex4 => v128 } - from_transmute! { unsafe isizex4 => v128 } -} - -#[cfg(target_pointer_width = "64")] -mod p64 { - use super::*; - from_transmute! { unsafe usizex2 => v128 } - from_transmute! { unsafe isizex2 => v128 } -} diff --git a/crates/core_simd/src/vendor/x86.rs b/crates/core_simd/src/vendor/x86.rs index 66aaf90eef59..eae42e6fd0d0 100644 --- a/crates/core_simd/src/vendor/x86.rs +++ b/crates/core_simd/src/vendor/x86.rs @@ -39,25 +39,3 @@ from_transmute! { unsafe i64x8 => __m512i } from_transmute! { unsafe f64x2 => __m128d } from_transmute! { unsafe f64x4 => __m256d } from_transmute! { unsafe f64x8 => __m512d } - -#[cfg(target_pointer_width = "32")] -mod p32 { - use super::*; - from_transmute! { unsafe usizex4 => __m128i } - from_transmute! { unsafe usizex8 => __m256i } - from_transmute! { unsafe Simd => __m512i } - from_transmute! { unsafe isizex4 => __m128i } - from_transmute! { unsafe isizex8 => __m256i } - from_transmute! { unsafe Simd => __m512i } -} - -#[cfg(target_pointer_width = "64")] -mod p64 { - use super::*; - from_transmute! { unsafe usizex2 => __m128i } - from_transmute! { unsafe usizex4 => __m256i } - from_transmute! { unsafe usizex8 => __m512i } - from_transmute! { unsafe isizex2 => __m128i } - from_transmute! { unsafe isizex4 => __m256i } - from_transmute! { unsafe isizex8 => __m512i } -} diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 48786d02440b..53fb2367b605 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -65,9 +65,9 @@ macro_rules! test_mask_api { fn roundtrip_int_conversion() { let values = [true, false, false, true, false, false, true, false]; let mask = Mask::<$type, 8>::from_array(values); - let int = mask.to_int(); + let int = mask.to_simd(); assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]); - assert_eq!(Mask::<$type, 8>::from_int(int), mask); + assert_eq!(Mask::<$type, 8>::from_simd(int), mask); } #[test] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 148aa5f9f177..b269efc9b1d7 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -11,7 +11,7 @@ use core_simd::simd; use core::intrinsics::simd as intrinsics; -use simd::{LaneCount, Simd, SupportedLaneCount}; +use simd::Simd; #[cfg(feature = "as_crate")] mod experimental { @@ -66,28 +66,43 @@ pub trait StdFloat: Sealed + Sized { /// Produces a vector where every element has the sine of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn sin(self) -> Self; + fn sin(self) -> Self { + unsafe { intrinsics::simd_fsin(self) } + } /// Produces a vector where every element has the cosine of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn cos(self) -> Self; + fn cos(self) -> Self { + unsafe { intrinsics::simd_fcos(self) } + } /// Produces a vector where every element has the exponential (base e) of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp(self) -> Self; + fn exp(self) -> Self { + unsafe { intrinsics::simd_fexp(self) } + } /// Produces a vector where every element has the exponential (base 2) of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp2(self) -> Self; + fn exp2(self) -> Self { + unsafe { intrinsics::simd_fexp2(self) } + } /// Produces a vector where every element has the natural logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn ln(self) -> Self; + fn ln(self) -> Self { + unsafe { intrinsics::simd_flog(self) } + } /// Produces a vector where every element has the logarithm with respect to an arbitrary /// in the equivalently-indexed elements in `self` and `base`. @@ -99,13 +114,19 @@ pub trait StdFloat: Sealed + Sized { /// Produces a vector where every element has the base-2 logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log2(self) -> Self; + fn log2(self) -> Self { + unsafe { intrinsics::simd_flog2(self) } + } /// Produces a vector where every element has the base-10 logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log10(self) -> Self; + fn log10(self) -> Self { + unsafe { intrinsics::simd_flog10(self) } + } /// Returns the smallest integer greater than or equal to each element. #[must_use = "method returns a new vector and does not mutate the original value"] @@ -140,68 +161,19 @@ pub trait StdFloat: Sealed + Sized { fn fract(self) -> Self; } -impl Sealed for Simd where LaneCount: SupportedLaneCount {} -impl Sealed for Simd where LaneCount: SupportedLaneCount {} +impl Sealed for Simd {} +impl Sealed for Simd {} -macro_rules! impl_float { - { - $($fn:ident: $intrinsic:ident,)* - } => { - impl StdFloat for Simd - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - $( - #[inline] - fn $fn(self) -> Self { - unsafe { intrinsics::$intrinsic(self) } - } - )* - } - - impl StdFloat for Simd - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - $( - #[inline] - fn $fn(self) -> Self { - // https://github.com/llvm/llvm-project/issues/83729 - #[cfg(target_arch = "aarch64")] - { - let mut ln = Self::splat(0f64); - for i in 0..N { - ln[i] = self[i].$fn() - } - ln - } - - #[cfg(not(target_arch = "aarch64"))] - { - unsafe { intrinsics::$intrinsic(self) } - } - } - )* - } +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() } } -impl_float! { - sin: simd_fsin, - cos: simd_fcos, - exp: simd_fexp, - exp2: simd_fexp2, - ln: simd_flog, - log2: simd_flog2, - log10: simd_flog10, +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } } diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index c66c968f8c66..c608ba49564e 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -16,15 +16,33 @@ macro_rules! unary_test { } } -macro_rules! binary_test { +macro_rules! unary_approx_test { { $scalar:tt, $($func:tt),+ } => { test_helpers::test_lanes! { $( fn $func() { - test_helpers::test_binary_elementwise( + test_helpers::test_unary_elementwise_approx( + &core_simd::simd::Simd::<$scalar, LANES>::$func, + &$scalar::$func, + &|_| true, + 8, + ) + } + )* + } + } +} + +macro_rules! binary_approx_test { + { $scalar:tt, $($func:tt),+ } => { + test_helpers::test_lanes! { + $( + fn $func() { + test_helpers::test_binary_elementwise_approx( &core_simd::simd::Simd::<$scalar, LANES>::$func, &$scalar::$func, &|_, _| true, + 16, ) } )* @@ -53,10 +71,13 @@ macro_rules! impl_tests { mod $scalar { use std_float::StdFloat; - unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc } - binary_test! { $scalar, log } + unary_test! { $scalar, sqrt, ceil, floor, round, trunc } ternary_test! { $scalar, mul_add } + // https://github.com/rust-lang/miri/issues/3555 + unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 } + binary_approx_test! { $scalar, log } + test_helpers::test_lanes! { fn fract() { test_helpers::test_unary_elementwise_flush_subnormals( diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index a5359b9abc84..408bb04c7aa4 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -6,3 +6,4 @@ publish = false [dependencies] proptest = { version = "0.10", default-features = false, features = ["alloc"] } +float-cmp = "0.10" diff --git a/crates/test_helpers/src/approxeq.rs b/crates/test_helpers/src/approxeq.rs new file mode 100644 index 000000000000..57b43a16bc6f --- /dev/null +++ b/crates/test_helpers/src/approxeq.rs @@ -0,0 +1,110 @@ +//! Compare numeric types approximately. + +use float_cmp::Ulps; + +pub trait ApproxEq { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool; + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result; +} + +impl ApproxEq for bool { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + +macro_rules! impl_integer_approxeq { + { $($type:ty),* } => { + $( + impl ApproxEq for $type { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self) + } + } + )* + }; +} + +impl_integer_approxeq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize } + +macro_rules! impl_float_approxeq { + { $($type:ty),* } => { + $( + impl ApproxEq for $type { + fn approxeq(&self, other: &Self, ulps: i64) -> bool { + if self.is_nan() && other.is_nan() { + true + } else { + (self.ulps(other) as i64).abs() <= ulps + } + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self.to_bits()) + } + } + )* + }; +} + +impl_float_approxeq! { f32, f64 } + +impl ApproxEq for [T; N] { + fn approxeq(&self, other: &Self, ulps: i64) -> bool { + self.iter() + .zip(other.iter()) + .fold(true, |value, (left, right)| { + value && left.approxeq(right, ulps) + }) + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + #[repr(transparent)] + struct Wrapper<'a, T: ApproxEq>(&'a T); + + impl core::fmt::Debug for Wrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } + } + + f.debug_list() + .entries(self.iter().map(|x| Wrapper(x))) + .finish() + } +} + +#[doc(hidden)] +pub struct ApproxEqWrapper<'a, T>(pub &'a T, pub i64); + +impl PartialEq for ApproxEqWrapper<'_, T> { + fn eq(&self, other: &T) -> bool { + self.0.approxeq(other, self.1) + } +} + +impl core::fmt::Debug for ApproxEqWrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } +} + +#[macro_export] +macro_rules! prop_assert_approxeq { + { $a:expr, $b:expr, $ulps:expr $(,)? } => { + { + use $crate::approxeq::ApproxEqWrapper; + let a = $a; + let b = $b; + proptest::prop_assert_eq!(ApproxEqWrapper(&a, $ulps), b); + } + }; +} diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 197c920e11ea..eb3d3f68bc2e 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -12,6 +12,9 @@ pub mod wasm; #[macro_use] pub mod biteq; +#[macro_use] +pub mod approxeq; + pub mod subnormals; use subnormals::FlushSubnormals; @@ -185,6 +188,41 @@ pub fn test_unary_elementwise( + fv: &dyn Fn(Vector) -> VectorResult, + fs: &dyn Fn(Scalar) -> ScalarResult, + check: &dyn Fn([Scalar; LANES]) -> bool, + ulps: i64, +) where + Scalar: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_1: [ScalarResult; LANES] = fv(x.into()).into(); + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .map(fs) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_approxeq!(result_1, result_2, ulps); + Ok(()) + }); +} + /// Test a unary vector function against a unary scalar function, applied elementwise. /// /// Where subnormals are flushed, use approximate equality. @@ -290,6 +328,44 @@ pub fn test_binary_elementwise< }); } +/// Test a binary vector function against a binary scalar function, applied elementwise. +pub fn test_binary_elementwise_approx< + Scalar1, + Scalar2, + ScalarResult, + Vector1, + Vector2, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, + ulps: i64, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_approxeq!(result_1, result_2, ulps); + Ok(()) + }); +} + /// Test a binary vector function against a binary scalar function, applied elementwise. /// /// Where subnormals are flushed, use approximate equality. @@ -528,8 +604,6 @@ macro_rules! test_lanes { use super::*; fn implementation() - where - core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body #[cfg(target_arch = "wasm32")] @@ -628,8 +702,6 @@ macro_rules! test_lanes_panic { use super::*; fn implementation() - where - core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body // test some odd and even non-power-of-2 lengths on miri diff --git a/rust-toolchain.toml b/rust-toolchain.toml index d17c6d2e8894..639d07df7337 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2025-01-16" +channel = "nightly-2026-01-26" components = ["rustfmt", "clippy", "miri", "rust-src"] From e33d70223e372bd93d76934242617e413a4cdcbb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 18 Mar 2026 15:04:47 +0100 Subject: [PATCH 2/9] simd_fmin/fmax: make semantics and name consistent with scalar intrinsics --- crates/core_simd/src/simd/num/float.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index efd7c2469512..175cbce4f58b 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -385,13 +385,13 @@ macro_rules! impl_trait { #[inline] fn simd_min(self, other: Self) -> Self { // Safety: `self` and `other` are float vectors - unsafe { core::intrinsics::simd::simd_fmin(self, other) } + unsafe { core::intrinsics::simd::simd_minimum_number_nsz(self, other) } } #[inline] fn simd_max(self, other: Self) -> Self { // Safety: `self` and `other` are floating point vectors - unsafe { core::intrinsics::simd::simd_fmax(self, other) } + unsafe { core::intrinsics::simd::simd_maximum_number_nsz(self, other) } } #[inline] From a52ff55f3888ea0bb15155b318a9ff99b3d436c5 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 11 Apr 2026 20:28:28 +0200 Subject: [PATCH 3/9] Merge commit '0557e3478104037c76c2e5be7ea21e56ebbaff6e' into sync-from-portable-simd-2026-04-11 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- Cargo.lock | 430 ++++++++++++++++--------- Cargo.toml | 5 + beginners-guide.md | 2 +- crates/core_simd/Cargo.toml | 8 +- crates/core_simd/src/alias.rs | 10 + crates/core_simd/src/cast.rs | 3 + crates/core_simd/src/lib.rs | 6 +- crates/core_simd/src/masks.rs | 26 +- crates/core_simd/src/ops.rs | 2 +- crates/core_simd/src/ops/unary.rs | 2 + crates/core_simd/src/simd/cmp/eq.rs | 2 +- crates/core_simd/src/simd/cmp/ord.rs | 2 +- crates/core_simd/src/simd/num/float.rs | 2 +- crates/core_simd/src/simd/prelude.rs | 2 +- crates/core_simd/src/vector.rs | 18 +- crates/core_simd/src/vendor.rs | 3 + crates/core_simd/src/vendor/hexagon.rs | 40 +++ crates/core_simd/tests/f16_ops.rs | 10 + crates/core_simd/tests/masks.rs | 13 + crates/core_simd/tests/round.rs | 8 + crates/std_float/src/lib.rs | 17 + crates/std_float/tests/float.rs | 4 +- crates/test_helpers/Cargo.toml | 2 +- crates/test_helpers/src/biteq.rs | 2 +- crates/test_helpers/src/lib.rs | 5 +- crates/test_helpers/src/subnormals.rs | 2 +- rust-toolchain.toml | 2 +- triagebot.toml | 44 +++ 29 files changed, 475 insertions(+), 199 deletions(-) create mode 100644 crates/core_simd/src/vendor/hexagon.rs create mode 100644 crates/core_simd/tests/f16_ops.rs create mode 100644 triagebot.toml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 31422b793450..5d354305e56d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,7 +10,7 @@ For a given vector math operation on TxN, please add tests for interactions with - [ ] 0 -For a given vector math operation on TxN where T is a float, please add tests for test interactions with: +For a given vector math operation on TxN where T is a float, please add tests for interactions with: - [ ] a really large number, larger than the mantissa - [ ] a really small "subnormal" number - [ ] NaN diff --git a/Cargo.lock b/Cargo.lock index 5a5f0d8907ae..c3b950bd5069 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -10,41 +21,43 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" -version = "1.3.2" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] -name = "byteorder" -version = "1.5.0" +name = "cast" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.33" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ + "find-msvc-tools", "shlex", ] [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "core_simd" version = "0.1.0" dependencies = [ + "getrandom", "proptest", "std_float", "test_helpers", @@ -52,6 +65,12 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "float-cmp" version = "0.10.0" @@ -62,31 +81,98 @@ dependencies = [ ] [[package]] -name = "js-sys" -version = "0.3.77" +name = "futures-core" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "js-sys" +version = "0.3.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" +dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] [[package]] -name = "log" -version = "0.4.27" +name = "libc" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "minicov" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +checksum = "4869b6a491569605d66d3952bcdf03df789e5b536e5f0cf7758a7f08a55ae24d" dependencies = [ "cc", "walkdir", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -94,13 +180,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "ppv-lite86" @@ -113,52 +212,58 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "0.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bitflags", - "byteorder", "num-traits", "rand", "rand_chacha", "rand_xorshift", + "regex-syntax", + "unarray", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] -name = "rand" -version = "0.7.3" +name = "r-efi" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", - "rand_hc", ] [[package]] name = "rand_chacha" -version = "0.2.2" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", @@ -166,28 +271,28 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.5.1" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "rand_core", + "getrandom", ] [[package]] name = "rand_xorshift" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ "rand_core", ] +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "rustversion" version = "1.0.22" @@ -203,12 +308,61 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "std_float" version = "0.1.0" @@ -221,9 +375,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -239,10 +393,16 @@ dependencies = [ ] [[package]] -name = "unicode-ident" -version = "1.0.18" +name = "unarray" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "walkdir" @@ -255,49 +415,42 @@ dependencies = [ ] [[package]] -name = "wasm-bindgen" -version = "0.2.100" +name = "wasip2" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -305,44 +458,53 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.50" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" +checksum = "1138411301a026d6662dc44e7076a74dbaa76a369312275eea5dee4d7dc68c7c" dependencies = [ + "async-trait", + "cast", "js-sys", + "libm", "minicov", + "nu-ansi-term", + "num-traits", + "oorandom", + "serde", + "serde_json", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", + "wasm-bindgen-test-shared", ] [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.50" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" +checksum = "186ddfe8383ba7ae7927bae3bb7343fd1f03ba2dbaf1474410f0d831131c269b" dependencies = [ "proc-macro2", "quote", @@ -350,113 +512,63 @@ dependencies = [ ] [[package]] -name = "web-sys" -version = "0.3.77" +name = "wasm-bindgen-test-shared" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "f032e076ceb8d36d5921c6cef5bf447f2ca2bbd5439ce1683d68d1c99cc2be16" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ "windows-sys", ] +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-targets", + "windows-link", ] [[package]] -name = "windows-targets" -version = "0.52.6" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 21d4584a9f4d..883140bae3f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,8 @@ opt-level = 2 [profile.test.package.test_helpers] opt-level = 2 + +[workspace.dependencies.proptest] +version = "1.11" +default-features = false +features = ["alloc", "f16"] diff --git a/beginners-guide.md b/beginners-guide.md index 4250a18315a6..c56873ea4b8d 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -56,7 +56,7 @@ The list notes the bit widths available at each feature level, though the operat ### Selecting Additional Target Features -If you want to enable support for a target feature within your build, generally you should use a [target-feature](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-feature) setting within you `RUSTFLAGS` setting. +If you want to enable support for a target feature within your build, generally you should use a [target-feature](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-feature) setting within your `RUSTFLAGS` setting. If you know that you're targeting a specific CPU you can instead use the [target-cpu](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-cpu) flag and the compiler will enable the correct set of features for that CPU. diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 537ce459c07c..6e576084ecfb 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -18,9 +18,13 @@ wasm-bindgen = "0.2" wasm-bindgen-test = "0.3" [dev-dependencies.proptest] -version = "0.10" +workspace = true + +# Enable the `wasm_js` feature so that getrandom works on wasm32-unknown-unknown. +[dev-dependencies.getrandom] +version = "0.3.4" default-features = false -features = ["alloc"] +features = ["wasm_js"] [dev-dependencies.test_helpers] path = "../test_helpers" diff --git a/crates/core_simd/src/alias.rs b/crates/core_simd/src/alias.rs index 23f121c46197..6dcfcb660c26 100644 --- a/crates/core_simd/src/alias.rs +++ b/crates/core_simd/src/alias.rs @@ -153,6 +153,16 @@ alias! { usizex64 64 } + f16 = { + f16x1 1 + f16x2 2 + f16x4 4 + f16x8 8 + f16x16 16 + f16x32 32 + f16x64 64 + } + f32 = { f32x1 1 f32x2 2 diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index 1c3592f80757..69dc7ba50d58 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -44,6 +44,9 @@ impl SimdCast for u64 {} unsafe impl Sealed for usize {} impl SimdCast for usize {} // Safety: primitive number types can be cast to other primitive number types +unsafe impl Sealed for f16 {} +impl SimdCast for f16 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl Sealed for f32 {} impl SimdCast for f32 {} // Safety: primitive number types can be cast to other primitive number types diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index fe26d99b9194..413a886f6c5b 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,17 +1,16 @@ #![no_std] #![feature( - const_eval_select, convert_float_to_int, + f16, core_intrinsics, decl_macro, - intra_doc_pointers, repr_simd, - simd_ffi, staged_api, prelude_import, ptr_metadata, rustc_attrs )] +#![cfg_attr(doc, feature(intra_doc_pointers))] #![cfg_attr( all( any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",), @@ -31,6 +30,7 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) )] +#![cfg_attr(target_arch = "hexagon", feature(stdarch_hexagon))] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny( unsafe_op_in_unsafe_fn, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 3e2209556b66..a5334afbe5f8 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -371,22 +371,20 @@ where // * perform _unsigned_ reduce-min // * check if the result is -1 or an index - let index = Simd::from_array( - const { - let mut index = [0; N]; - let mut i = 0; - while i < N { - index[i] = i; - i += 1; - } - index - }, - ); + let index: Simd = const { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = i; + i += 1; + } + // Safety: the input and output are integer vectors + unsafe { core::intrinsics::simd::simd_cast(Simd::from_array(index)) } + }; // Safety: the input and output are integer vectors - let index: Simd = unsafe { core::intrinsics::simd::simd_cast(index) }; - - let masked_index = self.select(index, Self::splat(true).to_simd()); + let masked_index: Simd = + unsafe { core::intrinsics::simd::simd_or((!self).to_simd(), index) }; // Safety: the input and output are integer vectors let masked_index: Simd = diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index eb6601f73483..c0a06ed46512 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -245,7 +245,7 @@ for_base_ops! { // We don't need any special precautions here: // Floats always accept arithmetic ops, but may become NaN. for_base_ops! { - T = (f32, f64); + T = (f16, f32, f64); type Lhs = Simd; type Rhs = Simd; type Output = Self; diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index e1c06167f979..af7aa8a823d9 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -19,6 +19,8 @@ macro_rules! neg { } neg! { + impl Neg for Simd + impl Neg for Simd impl Neg for Simd diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index d553d6c040c9..76836404cbc4 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -42,7 +42,7 @@ macro_rules! impl_number { } } -impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } +impl_number! { f16, f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } macro_rules! impl_mask { { $($integer:ty),* } => { diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index 5672fbbf54ca..5a4e74c753b5 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -144,7 +144,7 @@ macro_rules! impl_float { } } -impl_float! { f32, f64 } +impl_float! { f16, f32, f64 } macro_rules! impl_mask { { $($integer:ty),* } => { diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index 175cbce4f58b..510f4c9eea39 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -444,4 +444,4 @@ macro_rules! impl_trait { } } -impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } +impl_trait! { f16 { bits: u16, mask: i16 }, f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index e5d7a2aeb73d..6e93f16e10b1 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -7,7 +7,7 @@ #[doc(no_inline)] pub use super::{ - Mask, Simd, + Mask, Select, Simd, ToBytes, cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd}, num::{SimdFloat, SimdInt, SimdUint}, ptr::{SimdConstPtr, SimdMutPtr}, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 5b3a689f3611..fbef69f267aa 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -363,7 +363,7 @@ where /// corresponding element in `enable` is `true`. /// /// When the element is disabled or out of bounds for the slice, that memory location - /// is not accessed and the corresponding value from `or` is passed through. + /// is not accessed and the default value for the element type is returned. /// /// # Examples /// ``` @@ -371,12 +371,11 @@ where /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::{Simd, Mask}; - /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let enable = Mask::from_array([true, true, false, true]); - /// let or = Simd::from_array([-5, -4, -3, -2]); + /// let vec: Vec = vec![10, 11, 12]; + /// let enable = Mask::from_array([false, true, true, true]); /// - /// let result = Simd::load_select(&vec, enable, or); - /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// let result = Simd::load_select_or_default(&vec, enable); + /// assert_eq!(result, Simd::from_array([0, 11, 12, 0])); /// ``` #[must_use] #[inline] @@ -1147,6 +1146,13 @@ unsafe impl SimdElement for isize { type Mask = isize; } +impl Sealed for f16 {} + +// Safety: f16 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for f16 { + type Mask = i16; +} + impl Sealed for f32 {} // Safety: f32 is a valid SIMD element type, and is supported by this API diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs index 57536e4fc77d..6b3c640c2f7c 100644 --- a/crates/core_simd/src/vendor.rs +++ b/crates/core_simd/src/vendor.rs @@ -32,3 +32,6 @@ mod powerpc; #[cfg(target_arch = "loongarch64")] mod loongarch64; + +#[cfg(target_arch = "hexagon")] +mod hexagon; diff --git a/crates/core_simd/src/vendor/hexagon.rs b/crates/core_simd/src/vendor/hexagon.rs new file mode 100644 index 000000000000..2b8ea55fde65 --- /dev/null +++ b/crates/core_simd/src/vendor/hexagon.rs @@ -0,0 +1,40 @@ +//! Conversions to Hexagon HVX SIMD types. + +use crate::simd::*; + +// HVX 128-byte mode (1024-bit vectors) +// Enable with: -C target-feature=+hvx-length128b +#[cfg(target_feature = "hvx-length128b")] +mod hvx_128b { + use super::*; + use core::arch::hexagon::v128::HvxVector; + + // Full vectors (1024-bit) map to HvxVector + from_transmute! { unsafe u16x64 => HvxVector } + from_transmute! { unsafe i16x64 => HvxVector } + from_transmute! { unsafe u32x32 => HvxVector } + from_transmute! { unsafe i32x32 => HvxVector } + from_transmute! { unsafe u64x16 => HvxVector } + from_transmute! { unsafe i64x16 => HvxVector } + + // FIXME: u8x128/i8x128 don't exist in portable-simd (max lane count is 64) + // u8x64/i8x64 are only 512-bit (half of HvxVector in 128B mode) +} + +// HVX 64-byte mode (512-bit vectors) +// Default when hvx-length128b is not specified +#[cfg(not(target_feature = "hvx-length128b"))] +mod hvx_64b { + use super::*; + use core::arch::hexagon::v64::HvxVector; + + // Full vectors (512-bit) map to HvxVector + from_transmute! { unsafe u8x64 => HvxVector } + from_transmute! { unsafe i8x64 => HvxVector } + from_transmute! { unsafe u16x32 => HvxVector } + from_transmute! { unsafe i16x32 => HvxVector } + from_transmute! { unsafe u32x16 => HvxVector } + from_transmute! { unsafe i32x16 => HvxVector } + from_transmute! { unsafe u64x8 => HvxVector } + from_transmute! { unsafe i64x8 => HvxVector } +} diff --git a/crates/core_simd/tests/f16_ops.rs b/crates/core_simd/tests/f16_ops.rs new file mode 100644 index 000000000000..f89bdf4738f8 --- /dev/null +++ b/crates/core_simd/tests/f16_ops.rs @@ -0,0 +1,10 @@ +#![feature(portable_simd)] +#![feature(f16)] + +#[macro_use] +mod ops_macros; + +// FIXME: some f16 operations cause rustc to hang on wasm simd +// https://github.com/llvm/llvm-project/issues/189251 +#[cfg(not(all(target_arch = "wasm32", target_feature = "simd128")))] +impl_float_tests! { f16, i16 } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 53fb2367b605..98a74be8e395 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -133,6 +133,19 @@ macro_rules! test_mask_api { cast_impl::(); cast_impl::(); } + + #[test] + fn first_set() { + for bitmask in 0..=u8::MAX { + let mask = Mask::<$type, 8>::from_bitmask(bitmask as u64); + let expected = if bitmask == 0 { + None + } else { + Some(bitmask.trailing_zeros() as usize) + }; + assert_eq!(mask.first_set(), expected); + } + } } } } diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 4c1ac3c36f89..95b17f415822 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -42,6 +42,14 @@ macro_rules! float_rounding_test { ) } + fn round_ties_even() { + test_helpers::test_unary_elementwise( + &Vector::::round_ties_even, + &Scalar::round_ties_even, + &|_| true, + ) + } + fn fract() { test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::fract, diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index b269efc9b1d7..ff3525452231 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -2,6 +2,7 @@ feature = "as_crate", feature(core_intrinsics), feature(portable_simd), + feature(f16), allow(internal_features) )] #[cfg(not(feature = "as_crate"))] @@ -156,14 +157,30 @@ pub trait StdFloat: Sealed + Sized { unsafe { intrinsics::simd_trunc(self) } } + /// Rounds each element to the nearest integer-valued float. + /// Ties are resolved by rounding to the number with an even least significant digit. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round_ties_even(self) -> Self { + unsafe { intrinsics::simd_round_ties_even(self) } + } + /// Returns the floating point's fractional value, with its integer part removed. #[must_use = "method returns a new vector and does not mutate the original value"] fn fract(self) -> Self; } +impl Sealed for Simd {} impl Sealed for Simd {} impl Sealed for Simd {} +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + impl StdFloat for Simd { #[inline] fn fract(self) -> Self { diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index c608ba49564e..0fa5da3dca50 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -25,7 +25,7 @@ macro_rules! unary_approx_test { &core_simd::simd::Simd::<$scalar, LANES>::$func, &$scalar::$func, &|_| true, - 8, + 16, ) } )* @@ -71,7 +71,7 @@ macro_rules! impl_tests { mod $scalar { use std_float::StdFloat; - unary_test! { $scalar, sqrt, ceil, floor, round, trunc } + unary_test! { $scalar, sqrt, ceil, floor, round, trunc, round_ties_even } ternary_test! { $scalar, mul_add } // https://github.com/rust-lang/miri/issues/3555 diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index 408bb04c7aa4..da7ef7bd9945 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -5,5 +5,5 @@ edition = "2021" publish = false [dependencies] -proptest = { version = "0.10", default-features = false, features = ["alloc"] } +proptest = { workspace = true, features = ["alloc", "std"] } float-cmp = "0.10" diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index cbc20cda0d62..36761e37dea7 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -53,7 +53,7 @@ macro_rules! impl_float_biteq { }; } -impl_float_biteq! { f32, f64 } +impl_float_biteq! { f16, f32, f64 } impl BitEq for *const T { fn biteq(&self, other: &Self) -> bool { diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index eb3d3f68bc2e..82adb06d8a9d 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -1,7 +1,7 @@ -#![feature(powerpc_target_feature)] +#![feature(f16)] #![cfg_attr( any(target_arch = "powerpc", target_arch = "powerpc64"), - feature(stdarch_powerpc) + feature(powerpc_target_feature, stdarch_powerpc) )] pub mod array; @@ -47,6 +47,7 @@ impl_num! { u16 } impl_num! { u32 } impl_num! { u64 } impl_num! { usize } +impl_num! { f16 } impl_num! { f32 } impl_num! { f64 } diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs index b5f19ba47b81..44dfbb3d6c95 100644 --- a/crates/test_helpers/src/subnormals.rs +++ b/crates/test_helpers/src/subnormals.rs @@ -39,7 +39,7 @@ macro_rules! impl_else { } } -impl_float! { f32, f64 } +impl_float! { f16, f32, f64 } impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } /// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 639d07df7337..6a58e59fb93e 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2026-01-26" +channel = "nightly-2026-03-18" components = ["rustfmt", "clippy", "miri", "rust-src"] diff --git a/triagebot.toml b/triagebot.toml new file mode 100644 index 000000000000..43048b5e4514 --- /dev/null +++ b/triagebot.toml @@ -0,0 +1,44 @@ +## See for documentation +## of these features. + +# Allow users to use labels commands. +# Documentation at: https://forge.rust-lang.org/triagebot/labeling.html +[relabel] +allow-unauthenticated = [ + "A-*", + "C-*", + "E-*", + "F-*", + "I-*", + "ISA-*", + "O-*", +] + +# Allow users to assign 'r?` someone to an issue or PR. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-assignment.html +[assign] +warn_non_default_branch = true + +# Warns when a PR contains merge commits +# Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html +[no-merges] +exclude_titles = ["Sync from"] + +# Canonicalize issue numbers to avoid closing the wrong issue +# when commits are included in upstream sync, as well as warning links in commits. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html +[issue-links] +check-commits = "uncanonicalized" + +# Enable issue transfers within the org +# Documentation at: https://forge.rust-lang.org/triagebot/transfer.html +[transfer] + +# Enable comments linking to triagebot range-diff when a PR is rebased +# onto a different base commit +# Documentation at: https://forge.rust-lang.org/triagebot/range-diff.html +[range-diff] + +# Add link to the review body to review changes since posting it. +# Documentation at: https://forge.rust-lang.org/triagebot/review-changes-since.html +[review-changes-since] From 81e7d7d99ed1f2890f8e2a32291a27d7088e094e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 12 Apr 2026 01:43:23 +0200 Subject: [PATCH 4/9] improve how `proptest` errors are displayed (#517) --- crates/test_helpers/src/lib.rs | 39 +++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 82adb06d8a9d..ce3680ac2c30 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -122,12 +122,23 @@ pub fn make_runner() -> proptest::test_runner::TestRunner { proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4)) } +#[track_caller] +fn unwrap_test_error( + x: Result>, +) -> T { + // Using the `Display` instance of the error is much more readable. + match x { + Ok(v) => v, + Err(e) => panic!("{e}"), + } +} + /// Test a function that takes a single value. pub fn test_1( f: &dyn Fn(A) -> proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner.run(&A::default_strategy(), f).unwrap(); + unwrap_test_error(runner.run(&A::default_strategy(), f)) } /// Test a function that takes two values. @@ -135,11 +146,11 @@ pub fn test_2 proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner - .run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { + unwrap_test_error( + runner.run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { f(a, b) - }) - .unwrap(); + }), + ) } /// Test a function that takes two values. @@ -151,16 +162,14 @@ pub fn test_3< f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner - .run( - &( - A::default_strategy(), - B::default_strategy(), - C::default_strategy(), - ), - |(a, b, c)| f(a, b, c), - ) - .unwrap(); + unwrap_test_error(runner.run( + &( + A::default_strategy(), + B::default_strategy(), + C::default_strategy(), + ), + |(a, b, c)| f(a, b, c), + )); } /// Test a unary vector function against a unary scalar function, applied elementwise. From 3bd3fffa7fdcab25692afb45776a1d5fbad620df Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 16 Apr 2026 17:47:51 +0200 Subject: [PATCH 5/9] float reduce_max/min: fix SNaN treatment (#515) --- crates/core_simd/src/simd/num/float.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index 9f27e527f00f..1fa7991920fa 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -430,14 +430,14 @@ macro_rules! impl_trait { #[inline] fn reduce_max(self) -> Self::Scalar { - // Safety: `self` is a float vector - unsafe { core::intrinsics::simd::simd_reduce_max(self) } + // LLVM has no intrinsic we can use here + // (https://github.com/llvm/llvm-project/issues/185827). + self.as_array().iter().copied().fold(Self::Scalar::NAN, Self::Scalar::max) } #[inline] fn reduce_min(self) -> Self::Scalar { - // Safety: `self` is a float vector - unsafe { core::intrinsics::simd::simd_reduce_min(self) } + self.as_array().iter().copied().fold(Self::Scalar::NAN, Self::Scalar::min) } } )* From f7893ba153232b3fbf52e2b03e1c1f4407d000ee Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 20 Apr 2026 17:00:51 +0200 Subject: [PATCH 6/9] add `f16` types to the prelude (#521) --- crates/core_simd/src/simd/prelude.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index 6e93f16e10b1..51b8def3d6ee 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -14,6 +14,10 @@ pub use super::{ simd_swizzle, }; +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{f16x1, f16x2, f16x4, f16x8, f16x16, f16x32, f16x64}; + #[rustfmt::skip] #[doc(no_inline)] pub use super::{f32x1, f32x2, f32x4, f32x8, f32x16, f32x32, f32x64}; From 9447da6f88b2f6cf22cbf7d01b3cc19978967001 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 23 Apr 2026 15:04:48 -0400 Subject: [PATCH 7/9] Add bound check hint to mask first_set (#523) --- crates/core_simd/src/masks.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index a5334afbe5f8..cb5d54020f7f 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -400,7 +400,15 @@ where if min_index.eq(T::TRUE) { None } else { - Some(min_index.to_usize()) + let min_index = min_index.to_usize(); + + // Allow eliminating bounds checks when using the index + // Safety: the index can't exceed the number of elements in the vector + unsafe { + core::hint::assert_unchecked(min_index < N); + } + + Some(min_index) } } } From 82ee1e04c63f893668962e5abc2696ea9b2ce00a Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 12 Apr 2026 23:47:43 +0200 Subject: [PATCH 8/9] use larger ulps for `log` because it uses 2 inexact operations --- crates/std_float/tests/float.rs | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index 0fa5da3dca50..f97e1123c852 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -33,23 +33,6 @@ macro_rules! unary_approx_test { } } -macro_rules! binary_approx_test { - { $scalar:tt, $($func:tt),+ } => { - test_helpers::test_lanes! { - $( - fn $func() { - test_helpers::test_binary_elementwise_approx( - &core_simd::simd::Simd::<$scalar, LANES>::$func, - &$scalar::$func, - &|_, _| true, - 16, - ) - } - )* - } - } -} - macro_rules! ternary_test { { $scalar:tt, $($func:tt),+ } => { test_helpers::test_lanes! { @@ -76,7 +59,19 @@ macro_rules! impl_tests { // https://github.com/rust-lang/miri/issues/3555 unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 } - binary_approx_test! { $scalar, log } + + // The implementation of log is a.ln() / b.ln(), so there are 2 inexact operations, + // hence a larger ulps is needed. + test_helpers::test_lanes! { + fn log() { + test_helpers::test_binary_elementwise_approx( + &core_simd::simd::Simd::<$scalar, LANES>::log, + &$scalar::log, + &|_, _| true, + 32, + ) + } + } test_helpers::test_lanes! { fn fract() { From b1042604758be230eb352551451695ce923d3251 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 11 Apr 2026 20:33:22 +0200 Subject: [PATCH 9/9] bump toolchain to `nightly-2026-04-28` --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6a58e59fb93e..27d2dd6efbbb 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2026-03-18" +channel = "nightly-2026-04-28" components = ["rustfmt", "clippy", "miri", "rust-src"]