Wasm32: Rust should not spill v128 function params into shadow stack
Rust has support for v128 type from WebAssembly spec. It is represented in Rust by core::arch::wasm32::v128 type.
Web Assembly treats it as primitive type and you can pass and return it by value, e.g. (func $add7_to_v128 (;0;) (type 0) (param v128) (result v128). This allows backend to generate efficient machine code later that uses, for example, xmm registers for that.
C & C++
C does exactly that:
#include <wasm_simd128.h>
bool exact_eq_128(v128_t a, v128_t b)
__attribute__((export_name("exact_eq_128")))
{
v128_t and_res = wasm_v128_andnot(a, b);
return !wasm_v128_any_true(and_res);
}
v128_t add7_to_v128(v128_t a)
__attribute__((export_name("add7_to_v128")))
{
v128_t b = wasm_u8x16_splat(7);
return wasm_i8x16_add(a, b);
}
Compile and print:
clang -std=c23 -msimd128 -O3 --target=wasm32-unknown-unknown \
-nostdlib '-Wl,--no-entry' -o c_v128.wasm .\c_v128.c
wasm-tools print .\c_v128.wasm
(func $exact_eq_128 (;0;) (type 0) (param v128 v128) (result i32)
local.get 0 ;; Load value from parameter to stack
local.get 1 ;; Load value from parameter to stack
v128.andnot
v128.any_true
i32.eqz
)
(func $add7_to_v128 (;1;) (type 1) (param v128) (result v128)
local.get 0
v128.const i32x4 0x07070707 0x07070707 0x07070707 0x07070707
i8x16.add
;; Implicitly return value from top of the stack
)
Rust
Unfortunately, Rust spills both arguments and result values to the shadow stack:
use core::arch::wasm32::*;
#[unsafe(export_name = "exact_eq_128")]
pub fn exact_eq_128(a: v128, b: v128)->bool {
let and_res = v128_andnot(a, b);
!v128_any_true(and_res)
}
#[unsafe(export_name = "add7_to_v128")]
pub fn add7_to_v128(a: v128)->v128 {
let b = u8x16_splat(7);
let c = u8x16_add(a, b);
c
}
Command:
rustc --version
rustc 1.95.0 (59807616e 2026-04-14)
rustc --edition=2024 -Ctarget-feature=+simd128 -Copt-level=3 \
--crate-type=cdylib -Clto=thin --target=wasm32-unknown-unknown \
.\eq_v128.rs -o eq_v128_rs.wasm
wasm-tools strip .\eq_v128_rs.wasm > eq_v128_rs_s.wasm
wasm-tools print .\eq_v128_rs_s.wasm
Output:
(func $add7_to_v128 (;0;) (type 0) (param i32 i32)
local.get 0 ;; Get pointer to place to return on shadow stack
local.get 1 ;; Get pointer to first actual parameter in shadow stack
v128.load ;; Load parameter from shadow stack
v128.const i32x4 0x07070707 0x07070707 0x07070707 0x07070707
i8x16.add
v128.store ;; Store value we return to caller provided space
)
(func $exact_eq_128 (;1;) (type 1) (param i32 i32) (result i32)
local.get 0
v128.load ;; Load first param from shadow stack
local.get 1
v128.load ;; Load second param from shadow stack
v128.andnot
v128.any_true
i32.eqz
)
Passing values through shadow stack is bad because backend cannot really optimize accesses to shadow stack because from the perspective of it, it is a shadow memory that can be edited anytime (there is no aliasing information). It is just linear memory without any context.
Wasmtime cannot optimize such code: Compiler Explorer
So, this should be changed.
Workaround
Until this behaviour changes, it can be circumvented by marking all functions that accept or receive v128 as extern:
#[unsafe(export_name = "exact_eq_128")]
pub extern fn exact_eq_128(a: v128, b: v128)->bool {
let and_res = v128_andnot(a, b);
!v128_any_true(and_res)
}
#[unsafe(export_name = "add7_to_v128")]
pub extern fn add7_to_v128(a: v128)->v128 {
let b = u8x16_splat(7);
let c = u8x16_add(a, b);
c
}
(func $add7_to_v128 (;0;) (type 0) (param v128) (result v128)
local.get 0
v128.const i32x4 0x07070707 0x07070707 0x07070707 0x07070707
i8x16.add
)
(func $exact_eq_128 (;1;) (type 1) (param v128 v128) (result i32)
local.get 0
local.get 1
v128.andnot
v128.any_true
i32.eqz
)
Discussion in the ATmosphere