Add TVector4 SIMD operators.

This commit is contained in:
scorpioblood 2024-05-30 21:25:55 +02:00
parent d25345aa07
commit 6126c7d5b0
7 changed files with 180 additions and 104 deletions

View File

@ -6,6 +6,9 @@
namespace Phanes::Core::Math::Detail namespace Phanes::Core::Math::Detail
{ {
template<RealType T, bool A>
struct construct_vec4 {};
template<RealType T, bool A> template<RealType T, bool A>
struct compute_vec4_add {}; struct compute_vec4_add {};
@ -31,6 +34,61 @@ namespace Phanes::Core::Math::Detail
struct compute_vec4_dec {}; struct compute_vec4_dec {};
template<RealType T>
struct construct_vec4<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const TVector4<T, false>& v2)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v2.z;
v1.w = v2.w;
}
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, T s)
{
v1.x = s;
v1.y = s;
v1.z = s;
v1.w = s;
}
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, T x, T y, T z, T w)
{
v1.x = x;
v1.y = y;
v1.z = z;
v1.w = w;
}
/*static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2, const Phanes::Core::Math::TVector2<T, false>& v3)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v3.x;
v1.w = v3.y;
}
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2, const Phanes::Core::Math::TVector2<T, false>& v3)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v3.x;
v1.w = v3.y;
}*/
static constexpr void map(Phanes::Core::Math::TVector4<T, false>& v1, const T* comp)
{
v1.x = comp[0];
v1.y = comp[1];
v1.z = comp[2];
v1.w = comp[3];
}
};
template<RealType T> template<RealType T>
struct compute_vec4_add<T, false> struct compute_vec4_add<T, false>
{ {

View File

@ -0,0 +1,30 @@
#pragma once
#include "Core/public/Math/Boilerplate.h"
namespace Phanes::Core::Math::SIMD
{
// Structure to conveniently align arrays.
template<typename T, size_t L>
struct alignas(sizeof(T) * 4) AlignedVec
{
public:
T data[L];
AlignedVec(const T* n_aligned_data)
{
for (size_t i = 0; i < L; ++i)
{
data[i] = n_aligned_data[i];
}
}
const T* Get()
{
return data;
}
};
}

View File

@ -31,7 +31,7 @@ namespace Phanes::Core::Math::SIMD
template<typename T, size_t L, bool IsAligned> template<typename T, size_t L, bool IsAligned>
struct use_simd struct use_simd
{ {
bool value = false; static const bool value = false;
}; };
@ -40,37 +40,37 @@ namespace Phanes::Core::Math::SIMD
template<> template<>
struct use_simd<float, 4, true> struct use_simd<float, 4, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
template<> template<>
struct use_simd<float, 3, true> struct use_simd<float, 3, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
template<> template<>
struct use_simd<int, 4, true> struct use_simd<int, 4, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
template<> template<>
struct use_simd<int, 3, true> struct use_simd<int, 3, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
template<> template<>
struct use_simd<unsigned int, 4, true> struct use_simd<unsigned int, 4, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
template<> template<>
struct use_simd<unsigned int, 3, true> struct use_simd<unsigned int, 3, true>
{ {
bool value = true && (P_SSE__ || P_NEON__); static const bool value = true && (P_SSE__ || P_NEON__);
}; };
// SSE // SSE
@ -78,19 +78,19 @@ namespace Phanes::Core::Math::SIMD
template<> template<>
struct use_simd<double, 2, true> struct use_simd<double, 2, true>
{ {
bool value = true && P_SSE__; static const bool value = true && P_SSE__;
}; };
template<> template<>
struct use_simd<Phanes::Core::Types::int64, 2, true> struct use_simd<Phanes::Core::Types::int64, 2, true>
{ {
bool value = true && P_SSE__; static const bool value = true && P_SSE__;
}; };
template<> template<>
struct use_simd<Phanes::Core::Types::uint64, 2, true> struct use_simd<Phanes::Core::Types::uint64, 2, true>
{ {
bool value = true && P_SSE__; static const bool value = true && P_SSE__;
}; };
@ -100,19 +100,19 @@ namespace Phanes::Core::Math::SIMD
template<> template<>
struct use_simd<double, 4, true> struct use_simd<double, 4, true>
{ {
bool value = true && P_AVX__; static const bool value = true && P_AVX__;
}; };
template<> template<>
struct use_simd<double, 3, true> struct use_simd<double, 3, true>
{ {
bool value = true && P_AVX__; static const bool value = true && P_AVX__;
}; };
template<> template<>
struct use_simd<float, 8, true> struct use_simd<float, 8, true>
{ {
bool value = true && P_AVX__; static const bool value = true && P_AVX__;
}; };
@ -121,37 +121,37 @@ namespace Phanes::Core::Math::SIMD
template<> template<>
struct use_simd<Phanes::Core::Types::int64, 4, true> struct use_simd<Phanes::Core::Types::int64, 4, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
template<> template<>
struct use_simd<Phanes::Core::Types::int64, 3, true> struct use_simd<Phanes::Core::Types::int64, 3, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
template<> template<>
struct use_simd<Phanes::Core::Types::uint64, 4, true> struct use_simd<Phanes::Core::Types::uint64, 4, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
template<> template<>
struct use_simd<Phanes::Core::Types::uint64, 3, true> struct use_simd<Phanes::Core::Types::uint64, 3, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
template<> template<>
struct use_simd<int, 8, true> struct use_simd<int, 8, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
template<> template<>
struct use_simd<unsigned int, 8, true> struct use_simd<unsigned int, 8, true>
{ {
bool value = true && P_AVX2__; static const bool value = true && P_AVX2__;
}; };
} }

View File

@ -6,6 +6,7 @@
#include "Core/public/Math/Boilerplate.h" #include "Core/public/Math/Boilerplate.h"
#include "Core/public/Math/MathCommon.hpp" #include "Core/public/Math/MathCommon.hpp"
#include <iostream> #include <iostream>
// -> For IntelliSense // -> For IntelliSense
@ -17,7 +18,7 @@
// ========== // // ========== //
Phanes::Core::Types::Vec4f32Reg p_vec4_abs(const Phanes::Core::Types::Vec4f32Reg& v) Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg& v)
{ {
return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
} }
@ -34,6 +35,47 @@ namespace Phanes::Core::Math::Detail
{ {
// Template class has already been defined and is included through: Storage.h -> Vector4.hpp -> SIMDIntrinsics.h -> PhanesVectorMathSEE.hpp // Template class has already been defined and is included through: Storage.h -> Vector4.hpp -> SIMDIntrinsics.h -> PhanesVectorMathSEE.hpp
template<>
struct construct_vec4<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, const TVector4<float, true>& v2)
{
v1.comp = _mm_set_ps(v2.x, v2.y, v2.z, v2.w);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, float s)
{
v1.comp = _mm_set_ps1(s);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, float x, float y, float z, float w)
{
v1.comp = _mm_set_ps(x, y, z, w);
}
/*static constexpr void map(Phanes::Core::Math::TVector4<float, false>& v1, const Phanes::Core::Math::TVector2<float, false>& v2, const Phanes::Core::Math::TVector2<float, false>& v3)
{
v1.comp = _mm_set_ps(v2.x, v2.y, v3.x, v3.y);
}
static constexpr void map(Phanes::Core::Math::TVector4<float, false>& v1, const Phanes::Core::Math::TVector2<float, false>& v2, const Phanes::Core::Math::TVector2<float, false>& v3)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v3.x;
v1.w = v3.y;
}*/
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, const float* s)
{
v1.comp = _mm_loadu_ps(s);
}
};
template<> template<>
struct compute_vec4_add<float, true> struct compute_vec4_add<float, true>
{ {

View File

@ -5,7 +5,7 @@
#include "Core/public/Math/MathTypes.h" #include "Core/public/Math/MathTypes.h"
namespace Phanes::Core::SIMD namespace Phanes::Core::Math::SIMD
{ {
template<size_t L, typename T, bool UseSimd> template<size_t L, typename T, bool UseSimd>
struct Storage; struct Storage;

View File

@ -24,22 +24,22 @@ namespace Phanes::Core::Math
/// <summary> /// <summary>
/// X component of vector /// X component of vector
/// </summary> /// </summary>
T x; Real x;
/// <summary> /// <summary>
/// X component of vector /// X component of vector
/// </summary> /// </summary>
T y; Real y;
/// <summary> /// <summary>
/// Z component of vector /// Z component of vector
/// </summary> /// </summary>
T z; Real z;
/// <summary> /// <summary>
/// W component of vector /// W component of vector
/// </summary> /// </summary>
T w; Real w;
}; };
/// <summary> /// <summary>
@ -47,8 +47,8 @@ namespace Phanes::Core::Math
/// </summary> /// </summary>
union union
{ {
typename Phanes::Core::SIMD::Storage<4, T, IsAlgined>::type comp; typename Phanes::Core::Math::SIMD::Storage<4, Real, SIMD::use_simd<Real, 4, IsAlgined>::value>::type comp;
typename Phanes::Core::SIMD::Storage<4, T, IsAlgined>::type data; typename Phanes::Core::Math::SIMD::Storage<4, Real, SIMD::use_simd<Real, 4, IsAlgined>::value>::type data;
}; };
}; };

View File

@ -15,44 +15,35 @@
namespace Phanes::Core::Math namespace Phanes::Core::Math
{ {
template<RealType T, bool A> template<RealType T, bool A>
TVector4<T, A>::TVector4(const TVector4<Real, A>& v) : TVector4<T, A>::TVector4(const TVector4<Real, A>& v)
x(v.x), {
y(v.y), Detail::construct_vec4<T, SIMD::use_simd<T, 4, A>::value>::map(*this, v);
z(v.z), }
w(v.w)
{}
template<RealType T, bool A> template<RealType T, bool A>
TVector4<T, A>::TVector4(Real _x, Real _y, Real _z, Real _w) : TVector4<T, A>::TVector4(Real _x, Real _y, Real _z, Real _w)
x(_x), {
y(_y), Detail::construct_vec4<T, SIMD::use_simd<T, 4, A>::value>::map(*this, _x, _y, _z, _w);
z(_z), }
w(_w)
{}
template<RealType T, bool A> template<RealType T, bool A>
Phanes::Core::Math::TVector4<T, A>::TVector4(Real s) : Phanes::Core::Math::TVector4<T, A>::TVector4(Real s)
x(s), {
y(s), Detail::construct_vec4<T, SIMD::use_simd<T, 4, A>::value>::map(*this, s);
z(s), }
w(s)
{}
template<RealType T, bool A> template<RealType T, bool A>
Phanes::Core::Math::TVector4<T, A>::TVector4(const TVector2<Real>& v1, const TVector2<Real>& v2) : Phanes::Core::Math::TVector4<T, A>::TVector4(const TVector2<Real>& v1, const TVector2<Real>& v2)
x(v1.x), {
y(v1.y), Detail::construct_vec4<T, SIMD::use_simd<T, 4, A>::value>::map(*this, v1, v2);
z(v2.x), }
w(v2.y)
{}
template<RealType T, bool A> template<RealType T, bool A>
Phanes::Core::Math::TVector4<T, A>::TVector4(const Real* comp) : Phanes::Core::Math::TVector4<T, A>::TVector4(const Real* comp)
x(comp[0]), {
y(comp[1]), Detail::construct_vec4<T, SIMD::use_simd<T, 4, A>::value>::map(*this, comp);
z(comp[2]), }
w(comp[3])
{}
template<RealType T, bool A> template<RealType T, bool A>
TVector4<T, A> operator+=(TVector4<T, A>& v1, const TVector4<T, A>& v2) TVector4<T, A> operator+=(TVector4<T, A>& v1, const TVector4<T, A>& v2)
@ -177,13 +168,13 @@ namespace Phanes::Core::Math
// Comparision // Comparision
template<RealType T, bool A> template<RealType T, bool A>
TVector4<T, A> operator==(const TVector4<T, A>& v1, const TVector4<T, A>& v2) bool operator==(const TVector4<T, A>& v1, const TVector4<T, A>& v2)
{ {
return Detail::compute_vec4_eq<T, A>::map(v1, v2); return Detail::compute_vec4_eq<T, A>::map(v1, v2);
} }
template<RealType T, bool A> template<RealType T, bool A>
TVector4<T, A> operator!=(const TVector4<T, A>& v1, const TVector4<T, A>& v2) bool operator!=(const TVector4<T, A>& v1, const TVector4<T, A>& v2)
{ {
return Detail::compute_vec4_ieq<T, A>::map(v1, v2); return Detail::compute_vec4_ieq<T, A>::map(v1, v2);
} }
@ -226,49 +217,4 @@ namespace Phanes::Core::Math
{ {
return --v1; return --v1;
} }
// SIMD constructor
template<>
TVector4<float, true>::TVector4(const TVector4<float, true>& v)
{
this->comp = _mm_load_ps(reinterpret_cast<const float*>(&v));
}
template<>
TVector4<float, true>::TVector4(float _x, float _y, float _z, float _w) :
x(_x),
y(_y),
z(_z),
w(_w)
{
this->comp = _mm_load_ps(reinterpret_cast<float*>(&this->x));
}
template<>
TVector4<float, true>::TVector4(float s)
{
this->comp = _mm_load_ps1(&s);
}
template<>
TVector4<float, true>::TVector4(const TVector2<float>& v1, const TVector2<float>& v2) :
x(v1.x),
y(v1.y),
z(v2.x),
w(v2.y)
{
this->comp = _mm_load_ps(reinterpret_cast<float*>(&this->x));
}
template<>
TVector4<float, true>::TVector4(const float* comp) :
x(comp[0]),
y(comp[1]),
z(comp[2]),
w(comp[3])
{
this->comp = _mm_load_ps(reinterpret_cast<float*>(&this->x));
}
} }