Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 6 additions & 56 deletions crates/core_arch/src/powerpc/altivec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ unsafe extern "unadjusted" {
}

#[macro_use]
mod sealed {
pub(crate) mod sealed {
use super::*;

#[unstable(feature = "stdarch_powerpc", issue = "111145")]
Expand Down Expand Up @@ -728,40 +728,8 @@ mod sealed {
unsafe fn vec_xl(self, a: isize) -> Self::Result;
}

macro_rules! impl_vec_xl {
($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(
all(test, not(target_feature = "power9-altivec")),
assert_instr($notpwr9)
)]
#[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))]
pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) {
let addr = (b as *const u8).offset(a);

let mut r = mem::MaybeUninit::uninit();

crate::ptr::copy_nonoverlapping(
addr,
r.as_mut_ptr() as *mut u8,
mem::size_of::<t_t_l!($ty)>(),
);

r.assume_init()
}

#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorXl for *const $ty {
type Result = t_t_l!($ty);
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_xl(self, a: isize) -> Self::Result {
$fun(a, self)
}
}
};
}
// Use the impl_vec_xl macro from macros module
use crate::core_arch::powerpc::macros::impl_vec_xl;

impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 }
impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 }
Expand Down Expand Up @@ -3030,7 +2998,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vsldoi, UIMM4 = 1))]
unsafe fn vsldoi<const UIMM4: i32>(
pub(crate) unsafe fn vsldoi<const UIMM4: i32>(
a: vector_unsigned_char,
b: vector_unsigned_char,
) -> vector_unsigned_char {
Expand Down Expand Up @@ -3082,9 +3050,9 @@ mod sealed {

// TODO: collapse the two once generic_const_exprs are usable.
#[inline]
#[target_feature(enable = "altivec")]
#[target_feature(enable = "vsx")]
#[cfg_attr(test, assert_instr(xxsldwi, UIMM2 = 1))]
unsafe fn xxsldwi<const UIMM2: i32>(
pub(crate) unsafe fn xxsldwi<const UIMM2: i32>(
a: vector_unsigned_char,
b: vector_unsigned_char,
) -> vector_unsigned_char {
Expand Down Expand Up @@ -3134,24 +3102,6 @@ mod sealed {
}
}

macro_rules! impl_vec_sld {
($($ty:ident),+) => { $(
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorSld for $ty {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sld<const UIMM4: i32>(self, b: Self) -> Self {
transmute(vsldoi::<UIMM4>(transmute(self), transmute(b)))
}
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sldw<const UIMM2: i32>(self, b: Self) -> Self {
transmute(xxsldwi::<UIMM2>(transmute(self), transmute(b)))
}
}
)+ };
}

impl_vec_sld! { vector_bool_char, vector_signed_char, vector_unsigned_char }
impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short }
impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int }
Expand Down
59 changes: 59 additions & 0 deletions crates/core_arch/src/powerpc/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ macro_rules! t_t_l {
(f32) => {
vector_float
};
(f64) => {
vector_double
};
}

macro_rules! t_t_s {
Expand Down Expand Up @@ -274,7 +277,63 @@ macro_rules! t_b {
};
}

macro_rules! impl_vec_sld {
($($ty:ident),+) => { $(
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorSld for $ty {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sld<const UIMM4: i32>(self, b: Self) -> Self {
transmute(vsldoi::<UIMM4>(transmute(self), transmute(b)))
}
#[inline]
#[target_feature(enable = "vsx")]
unsafe fn vec_sldw<const UIMM2: i32>(self, b: Self) -> Self {
transmute(xxsldwi::<UIMM2>(transmute(self), transmute(b)))
}
}
)+ };
}

macro_rules! impl_vec_xl {
($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => {
#[inline]
#[target_feature(enable = "vsx")]
#[unstable(feature = "stdarch_powerpc", issue = "111145")]
#[cfg_attr(
all(test, not(target_feature = "power9-altivec")),
assert_instr($notpwr9)
)]
#[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))]
pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) {
let addr = (b as *const u8).offset(a);

let mut r = mem::MaybeUninit::uninit();

crate::ptr::copy_nonoverlapping(
addr,
r.as_mut_ptr() as *mut u8,
mem::size_of::<t_t_l!($ty)>(),
);

r.assume_init()
}

#[unstable(feature = "stdarch_powerpc", issue = "111145")]
impl VectorXl for *const $ty {
type Result = t_t_l!($ty);
#[inline]
#[target_feature(enable = "vsx")]
unsafe fn vec_xl(self, a: isize) -> Self::Result {
$fun(a, self)
}
}
};
}

pub(crate) use impl_vec_sld;
pub(crate) use impl_vec_trait;
pub(crate) use impl_vec_xl;
pub(crate) use s_t_l;
pub(crate) use t_b;
pub(crate) use t_t_l;
Expand Down
82 changes: 81 additions & 1 deletion crates/core_arch/src/powerpc/vsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

#![allow(non_camel_case_types)]

use super::macros::*;
use crate::core_arch::powerpc::*;
use crate::core_arch::simd::*;

#[cfg(test)]
use stdarch_test::assert_instr;

use crate::mem::transmute;
use crate::mem::{self, transmute};

types! {
#![unstable(feature = "stdarch_powerpc", issue = "111145")]
Expand Down Expand Up @@ -171,6 +172,14 @@ mod sealed {
vec_mergeeo! { vector_unsigned_int, mergee, mergeo }
vec_mergeeo! { vector_bool_int, mergee, mergeo }
vec_mergeeo! { vector_float, mergee, mergeo }

// Implement vec_xl for f64 (vector_double).
use crate::core_arch::powerpc::altivec::sealed::VectorXl;
impl_vec_xl! { vec_xl_f64 lxvd2x / lxv f64 }

// Implement vec_sld for vector_double.
use crate::core_arch::powerpc::altivec::sealed::{VectorSld, vsldoi, xxsldwi};
impl_vec_sld! { vector_double }
}

/// Vector permute.
Expand Down Expand Up @@ -255,4 +264,75 @@ mod tests {
test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}

#[simd_test(enable = "altivec")]
fn test_vec_sld_f64x2() {
use crate::core_arch::powerpc::altivec::sealed::VectorSld;

let a = vector_double::from(f64x2::from_array([1.0, 2.0]));
let b = vector_double::from(f64x2::from_array([3.0, 4.0]));

// Shift left by 8 bytes (1 f64 element).
// On little-endian: shifts right in memory, result is [b[1], a[0]] = [4.0, 1.0].
// On big-endian: shifts left in memory, result is [a[1], b[0]] = [2.0, 3.0].
unsafe {
let result: f64x2 = transmute(a.vec_sld::<8>(b));
#[cfg(target_endian = "little")]
let expected = f64x2::from_array([4.0, 1.0]);
#[cfg(target_endian = "big")]
let expected = f64x2::from_array([2.0, 3.0]);
assert_eq!(result, expected);
}
}

#[simd_test(enable = "vsx")]
fn test_vec_sldw_f64x2() {
use crate::core_arch::powerpc::altivec::sealed::VectorSld;

let a = vector_double::from(f64x2::from_array([1.0, 2.0]));
let b = vector_double::from(f64x2::from_array([3.0, 4.0]));

// Shift left by 1 word (4 bytes).
// vec_sldw shifts by words (4-byte units), so UIMM2=1 means shift by 4 bytes
// whic is equivalent to vec_sld with UIMM4=4.
unsafe {
let result: f64x2 = transmute(a.vec_sldw::<1>(b));
// Shifting by 4 bytes (half of an f64) will mix the high/low parts.
// On little-endian: shifts right in memory, result is [b[1], a[0]] but with 4-byte shift.
// On big-endian: shifts left in memory, result is [a[1], b[0]] but with 4-byte shift.
// Since we're shifting by 4 bytes (half an f64), the result will have mixed bits.
// Verify using vec_sld with the same shift amount for comparison.
let expected: f64x2 = transmute(a.vec_sld::<4>(b));
assert_eq!(result, expected);
}
}

#[simd_test(enable = "vsx")]
fn test_vec_xl_f64() {
let pat = [1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];

// Test loading from aligned offset 0
unsafe {
let result = vec_xl(0, pat.as_ptr());
let result_f64: f64x2 = transmute(result);
assert_eq!(result_f64.as_array()[0], 1.0);
assert_eq!(result_f64.as_array()[1], 2.0);
}

// Test loading from offset 16 (2 f64 elements = 16 bytes)
unsafe {
let result = vec_xl(16, pat.as_ptr());
let result_f64: f64x2 = transmute(result);
assert_eq!(result_f64.as_array()[0], 3.0);
assert_eq!(result_f64.as_array()[1], 4.0);
}

// Test loading from offset 32 (4 f64 elements = 32 bytes)
unsafe {
let result = vec_xl(32, pat.as_ptr());
let result_f64: f64x2 = transmute(result);
assert_eq!(result_f64.as_array()[0], 5.0);
assert_eq!(result_f64.as_array()[1], 6.0);
}
}
}
Loading