SIMD improvments.

This commit is contained in:
scorpioblood 2024-06-03 21:45:38 +02:00
parent 17e1e61ae5
commit 202baf29f5
11 changed files with 834 additions and 405 deletions

View File

@ -10,7 +10,7 @@
#else
#include <type_traits>
#include <memory>
#endif

View File

@ -0,0 +1,211 @@
#pragma once
#include "Core/public/Math/Boilerplate.h"
namespace Phanes::Core::Math::Detail
{
template<RealType T, bool S>
struct construct_vec3 {};
template<RealType T, bool S>
struct compute_vec3_add {};
template<RealType T, bool S>
struct compute_vec3_sub {};
template<RealType T, bool S>
struct compute_vec3_mul {};
template<RealType T, bool S>
struct compute_vec3_div {};
template<RealType T, bool S>
struct compute_vec3_eq {};
template<RealType T, bool S>
struct compute_vec3_ieq {};
template<RealType T, bool S>
struct compute_vec3_inc {};
template<RealType T, bool S>
struct compute_vec3_dec {};
template<RealType T>
struct construct_vec3<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, const TVector3<T, false>& v2)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v2.z;
v1.w = (T)0.0;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, T s)
{
v1.x = s;
v1.y = s;
v1.z = s;
v1.w = (T)0.0;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, T x, T y, T z)
{
v1.x = x;
v1.y = y;
v1.z = z;
v1.w = (T)0.0;
}
/*static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2, const Phanes::Core::Math::TVector2<T, false>& v3)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v3.x;
v1.w = v3.y;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2, const Phanes::Core::Math::TVector2<T, false>& v3)
{
v1.x = v2.x;
v1.y = v2.y;
v1.z = v3.x;
v1.w = v3.y;
}*/
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& v1, const T* comp)
{
v1.x = comp[0];
v1.y = comp[1];
v1.z = comp[2];
v1.w = (T)0.0;
}
};
template<RealType T>
struct compute_vec3_add<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
r.x = v1.x + v2.x;
r.y = v1.y + v2.y;
r.z = v1.z + v2.z;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, T s)
{
r.x = v1.x + s;
r.y = v1.y + s;
r.z = v1.z + s;
}
};
template<RealType T>
struct compute_vec3_sub<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
r.x = v1.x - v2.x;
r.y = v1.y - v2.y;
r.z = v1.z - v2.z;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, T s)
{
r.x = v1.x - s;
r.y = v1.y - s;
r.z = v1.z - s;
}
};
template<RealType T>
struct compute_vec3_mul<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
r.x = v1.x * v2.x;
r.y = v1.y * v2.y;
r.z = v1.z * v2.z;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, T s)
{
r.x = v1.x * s;
r.y = v1.y * s;
r.z = v1.z * s;
}
};
template<RealType T>
struct compute_vec3_div<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
r.x = v1.x / v2.x;
r.y = v1.y / v2.y;
r.z = v1.z / v2.z;
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, T s)
{
s = (T)1.0 / s;
r.x = v1.x * s;
r.y = v1.y * s;
r.z = v1.z * s;
}
};
template<RealType T>
struct compute_vec3_eq<T, false>
{
static constexpr bool map(const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC &&
Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC &&
Phanes::Core::Math::Abs(v1.z - v2.z) < P_FLT_INAC);
}
};
template<RealType T>
struct compute_vec3_ieq<T, false>
{
static constexpr bool map(const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
{
return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC ||
Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC ||
Phanes::Core::Math::Abs(v1.z - v2.z) > P_FLT_INAC);
}
};
template<RealType T>
struct compute_vec3_inc<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1)
{
r.x = v1.x + 1;
r.y = v1.y + 1;
r.z = v1.z + 1;
}
};
template<RealType T>
struct compute_vec3_dec<T, false>
{
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1)
{
r.x = v1.x - 1;
r.y = v1.y - 1;
r.z = v1.z - 1;
}
};
}

View File

@ -1,36 +1,34 @@
#pragma once
#include "Core/public/Math/Boilerplate.h"
#include "Core/public/Math/MathCommon.hpp"
#include "Core/public/Math/MathFwd.h"
namespace Phanes::Core::Math::Detail
{
template<RealType T, bool A>
template<RealType T, bool S>
struct construct_vec4 {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_add {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_sub {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_mul {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_div {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_eq {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_ieq {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_inc {};
template<RealType T, bool A>
template<RealType T, bool S>
struct compute_vec4_dec {};

View File

@ -0,0 +1,4 @@
#pragma once
#include "Core/public/Math/Vector3.hpp"
#include "Core/public/Math/Vector4.hpp"

View File

@ -27,7 +27,6 @@ namespace Phanes::Core::Math {
template<RealType T> struct TColor;
template<RealType T> struct TLinearColor;
template<RealType T> struct TVector2;
template<RealType T> struct TVector3;
template<RealType T> struct TRay;
template<RealType T> struct TLine;
template<RealType T> struct TPlane;
@ -45,7 +44,8 @@ namespace Phanes::Core::Math {
template<IntType T> struct TIntPoint2;
template<IntType T> struct TIntPoint3;
template<IntType T> struct TIntPoint4;
template<RealType T, bool IsAligned> struct TVector4;
template<RealType T, bool A> struct TVector3;
template<RealType T, bool A> struct TVector4;
/**
* Specific instantiation of forward declarations.
@ -58,13 +58,6 @@ namespace Phanes::Core::Math {
typedef std::vector<Vector2> Vector2List;
typedef std::vector<Vector2d> Vector2Listd;
// TVector3
typedef TVector3<float> Vector3;
typedef TVector3<double> Vector3d;
typedef std::vector<Vector3> Vector3List;
typedef std::vector<Vector3d> Vector3Listd;
// TIntVector2

View File

@ -5,24 +5,79 @@
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
#include "Core/public/Math/Boilerplate.h"
#include "Core/public/Math/MathCommon.hpp"
#include <iostream>
// -> For IntelliSense
#include "Core/public/Math/Vector3.hpp"
#include "Core/public/Math/Vector4.hpp"
// ========== //
// Common //
// ========== //
namespace Phanes::Core::Math::SIMD
{
/// <summary>
/// Adds all scalars of the vector.
/// </summary>
/// <param name="v">Vector</param>
/// <returns>Sum stored in v[0:31].</returns>
Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
{
__m128 shufl = _mm_movehdup_ps(v);
__m128 sum = _mm_add_ps(v, shufl);
shufl = _mm_movehl_ps(sum, sum);
return _mm_add_ss(sum, shufl);
}
Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg& v)
/// <summary>
/// Adds all scalars of the vector.
/// </summary>
/// <param name="v">Vector</param>
/// <returns>Sum of components.</returns>
float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
{
__m128 shufl = _mm_movehdup_ps(v);
__m128 sum = _mm_add_ps(v, shufl);
shufl = _mm_movehl_ps(sum, sum);
sum = _mm_add_ss(sum, shufl);
return _mm_cvtss_f32(sum);
}
/// <summary>
/// Gets the absolute value of each scalar in the vector.
/// </summary>
/// <param name="v">Vector</param>
/// <returns>Vector with all components positive.</returns>
Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
{
return _mm_and_ps(v, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
}
/// <summary>
/// Gets the dot product of the
/// </summary>
/// <param name="v1"></param>
/// <param name="v2"></param>
/// <returns></returns>
Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
{
return vec4_hadd(_mm_mul_ps(v1, v2));
}
/// <summary>
/// Gets the dot product of the
/// </summary>
/// <param name="v1"></param>
/// <param name="v2"></param>
/// <returns></returns>
float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
{
return vec4_hadd_cvtf32(_mm_mul_ps(v1, v2));
}
}
@ -41,7 +96,7 @@ namespace Phanes::Core::Math::Detail
{
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, const TVector4<float, true>& v2)
{
v1.comp = _mm_set_ps(v2.x, v2.y, v2.z, v2.w);
v1.comp = _mm_setr_ps(v2.x, v2.y, v2.z, v2.w);
}
@ -52,7 +107,7 @@ namespace Phanes::Core::Math::Detail
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& v1, float x, float y, float z, float w)
{
v1.comp = _mm_set_ps(x, y, z, w);
v1.comp = _mm_setr_ps(x, y, z, w);
}
/*static constexpr void map(Phanes::Core::Math::TVector4<float, false>& v1, const Phanes::Core::Math::TVector2<float, false>& v2, const Phanes::Core::Math::TVector2<float, false>& v3)
@ -83,6 +138,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_add_ps(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1, float s)
{
r.comp = _mm_add_ps(v1.comp, _mm_set_ps1(s));
}
};
template<>
@ -92,6 +152,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_sub_ps(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1, float s)
{
r.comp = _mm_sub_ps(v1.comp, _mm_set_ps1(s));
}
};
template<>
@ -101,6 +166,11 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_mul_ps(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1, float s)
{
r.comp = _mm_mul_ps(v1.comp, _mm_set_ps1(s));
}
};
template<>
@ -110,6 +180,29 @@ namespace Phanes::Core::Math::Detail
{
r.comp = _mm_div_ps(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1, float s)
{
r.comp = _mm_div_ps(v1.comp, _mm_set_ps1(s));
}
};
template<>
struct compute_vec4_inc<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1)
{
r.comp = _mm_add_ps(v1.comp, _mm_set_ps1(1.0f));
}
};
template<>
struct compute_vec4_dec<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector4<float, true>& r, const Phanes::Core::Math::TVector4<float, true>& v1)
{
r.comp = _mm_sub_ps(v1.comp, _mm_set_ps1(1.0f));
}
};
template<>
@ -133,5 +226,63 @@ namespace Phanes::Core::Math::Detail
return (r == 0xffffffff) ? true : false;
}
};
//// ============ //
//// TVector3 //
//// ============ //
template<>
struct construct_vec3<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& v1, const TVector3<float, true>& v2)
{
v1.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& v1, float s)
{
v1.comp = _mm_set_ps1(s);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& v1, float x, float y, float z)
{
v1.comp = _mm_setr_ps(x, y, z, 0.0f);
}
/*static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& v1, const Phanes::Core::Math::TVector2<float, true>& v2, float s)
{
v1.comp = _mm_set_ps(v2.x, v2.y, v3.x, v3.y);
}*/
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& v1, const float* s)
{
v1.comp = _mm_setr_ps(s[0], s[1], s[2], 0.0f);
}
};
template<>
struct compute_vec3_inc<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& r, const Phanes::Core::Math::TVector3<float, true>& v1)
{
r.comp = _mm_add_ps(v1.comp, _mm_set_ps(1.0f, 1.0f, 1.0f, 0.0f));
}
};
template<>
struct compute_vec3_dec<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& r, const Phanes::Core::Math::TVector3<float, true>& v1)
{
r.comp = _mm_sub_ps(v1.comp, _mm_set_ps(1.0f, 1.0f, 1.0f, 0.0f));
}
};
template<> struct compute_vec3_add<float, true> : public compute_vec4_add<float, true> {};
}

View File

@ -17,7 +17,6 @@ namespace Phanes::Core::Math::SIMD
typedef struct type {
T data[L];
} type;
};
template<typename T>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,211 @@
#pragma once
#include "Core/public/Math/Boilerplate.h"
#include "Core/public/Math/Detail/Vector3Decl.inl"
#include "Core/public/Math/SIMD/SIMDIntrinsics.h"
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
namespace Phanes::Core::Math
{
template<RealType T, bool A>
TVector3<T, A>::TVector3(const TVector3<Real, A>& v)
{
Detail::construct_vec3<T, SIMD::use_simd<T, 3, A>::value>::map(*this, v);
}
template<RealType T, bool A>
TVector3<T, A>::TVector3(Real _x, Real _y, Real _z)
{
Detail::construct_vec3<T, SIMD::use_simd<T, 3, A>::value>::map(*this, _x, _y, _z);
}
template<RealType T, bool A>
TVector3<T, A>::TVector3(Real s)
{
Detail::construct_vec3<T, SIMD::use_simd<T, 3, A>::value>::map(*this, s);
}
template<RealType T, bool A>
TVector3<T, A>::TVector3(const TVector2<Real>& v1, Real s)
{
Detail::construct_vec3<T, SIMD::use_simd<T, 3, A>::value>::map(*this, v1.x, v1.y, s);
}
template<RealType T, bool A>
TVector3<T, A>::TVector3(const Real* comp)
{
Detail::construct_vec3<T, SIMD::use_simd<T, 3, A>::value>::map(*this, comp);
}
template<RealType T, bool A>
TVector3<T, A> operator+=(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
Detail::compute_vec3_add<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator+=(TVector3<T, A>& v1, T s)
{
Detail::compute_vec3_add<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator-=(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
Detail::compute_vec3_sub<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator-=(TVector3<T, A>& v1, T s)
{
Detail::compute_vec3_sub<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator*=(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
Detail::compute_vec3_mul<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator*=(TVector3<T, A>& v1, T s)
{
Detail::compute_vec3_mul<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator/=(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
Detail::compute_vec3_div<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator/=(TVector3<T, A>& v1, T s)
{
Detail::compute_vec3_div<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector3<T, A> operator+(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
TVector3<T, A> r;
Detail::compute_vec3_add<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, v2);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator+(TVector3<T, A>& v1, T s)
{
TVector3<T, A> r;
Detail::compute_vec3_add<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, s);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator-(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
TVector3<T, A> r;
Detail::compute_vec3_sub<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, v2);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator-(TVector3<T, A>& v1, T s)
{
TVector3<T, A> r;
Detail::compute_vec3_sub<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, s);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator*(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
TVector3<T, A> r;
Detail::compute_vec3_mul<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, v2);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator*(TVector3<T, A>& v1, T s)
{
TVector3<T, A> r;
Detail::compute_vec3_mul<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, s);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator/(TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
TVector3<T, A> r;
Detail::compute_vec3_div<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, v2);
return r;
}
template<RealType T, bool A>
TVector3<T, A> operator/(TVector3<T, A>& v1, T s)
{
TVector3<T, A> r;
Detail::compute_vec3_div<T, SIMD::use_simd<T, 3, A>::value>::map(r, v1, s);
return r;
}
// Comparision
template<RealType T, bool A>
bool operator==(const TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
return Detail::compute_vec3_eq<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v2);
}
template<RealType T, bool A>
bool operator!=(const TVector3<T, A>& v1, const TVector3<T, A>& v2)
{
return Detail::compute_vec3_ieq<T, SIMD::use_simd<T, 3, A>::value>::map(v1, v2);
}
// Inc- / Decrement
template<RealType T, bool A>
TVector3<T, A>& operator++(TVector3<T, A>& v1)
{
Detail::compute_vec3_inc<T, SIMD::use_simd<T, 3, A>::value>::map(v1);
return v1;
}
template<RealType T, bool A>
TVector3<T, A>& operator--(TVector3<T, A>& v1)
{
Detail::compute_vec3_inc<T, SIMD::use_simd<T, 3, A>::value>::map(v1);
return v1;
}
template<RealType T, bool A>
TVector3<T, A>& operator++(TVector3<T, A>& v1, int)
{
return ++v1;
}
template<RealType T, bool A>
TVector3<T, A>& operator--(TVector3<T, A>& v1, int)
{
return --v1;
}
}

View File

@ -7,6 +7,7 @@
#include "Core/public/Math/MathFwd.h"
#include "Core/public/Math/Vector2.hpp"
namespace Phanes::Core::Math
@ -14,7 +15,7 @@ namespace Phanes::Core::Math
/// 4D Vector defined with x, y, z, w.
/// Alignment allows for possible simd optimization.
template<RealType T, bool IsAlgined = false>
template<RealType T, bool IsAligned = false>
struct TVector4
{
public:
@ -48,8 +49,8 @@ namespace Phanes::Core::Math
/// </summary>
union
{
typename Phanes::Core::Math::SIMD::Storage<4, Real, SIMD::use_simd<Real, 4, IsAlgined>::value>::type comp;
typename Phanes::Core::Math::SIMD::Storage<4, Real, SIMD::use_simd<Real, 4, IsAlgined>::value>::type data;
typename SIMD::Storage<4, Real, SIMD::use_simd<T, 4, IsAligned>::value>::type comp;
typename SIMD::Storage<4, Real, SIMD::use_simd<T, 4, IsAligned>::value>::type data;
};
};
@ -58,7 +59,7 @@ namespace Phanes::Core::Math
TVector4() = default;
/// Copy constructor
TVector4(const TVector4<Real, IsAlgined>& v);
TVector4(const TVector4<Real, IsAligned>& v);
/// <summary>
/// Construct vector from one scalar.

View File

@ -5,13 +5,9 @@
#include "Core/public/Math/Detail/Vector4Decl.inl"
#include "Core/public/Math/SIMD/SIMDIntrinsics.h"
#include "Core/public/Math/Vector4.hpp"
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
#include <stdio.h>
namespace Phanes::Core::Math
{
template<RealType T, bool A>
@ -48,56 +44,56 @@ namespace Phanes::Core::Math
template<RealType T, bool A>
TVector4<T, A> operator+=(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
Detail::compute_vec4_add<T, A>::map(v1, v1, v2);
Detail::compute_vec4_add<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator+=(TVector4<T, A>& v1, T s)
{
Detail::compute_vec4_add<T, A>::map(v1, v1, s);
Detail::compute_vec4_add<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator-=(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
Detail::compute_vec4_sub<T, A>::map(v1, v1, v2);
Detail::compute_vec4_sub<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator-=(TVector4<T, A>& v1, T s)
{
Detail::compute_vec4_sub<T, A>::map(v1, v1, s);
Detail::compute_vec4_sub<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator*=(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
Detail::compute_vec4_mul<T, A>::map(v1, v1, v2);
Detail::compute_vec4_mul<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator*=(TVector4<T, A>& v1, T s)
{
Detail::compute_vec4_mul<T, A>::map(v1, v1, s);
Detail::compute_vec4_mul<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, s);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator/=(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
Detail::compute_vec4_div<T, A>::map(v1, v1, v2);
Detail::compute_vec4_div<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, v2);
return v1;
}
template<RealType T, bool A>
TVector4<T, A> operator/=(TVector4<T, A>& v1, T s)
{
Detail::compute_vec4_div<T, A>::map(v1, v1, s);
Detail::compute_vec4_div<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v1, s);
return v1;
}
@ -105,7 +101,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator+(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
TVector4<T, A> r;
Detail::compute_vec4_add<T, A>::map(r, v1, v2);
Detail::compute_vec4_add<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, v2);
return r;
}
@ -113,7 +109,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator+(TVector4<T, A>& v1, T s)
{
TVector4<T, A> r;
Detail::compute_vec4_add<T, A>::map(r, v1, s);
Detail::compute_vec4_add<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, s);
return r;
}
@ -121,7 +117,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator-(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
TVector4<T, A> r;
Detail::compute_vec4_sub<T, A>::map(r, v1, v2);
Detail::compute_vec4_sub<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, v2);
return r;
}
@ -129,7 +125,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator-(TVector4<T, A>& v1, T s)
{
TVector4<T, A> r;
Detail::compute_vec4_sub<T, A>::map(r, v1, s);
Detail::compute_vec4_sub<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, s);
return r;
}
@ -137,7 +133,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator*(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
TVector4<T, A> r;
Detail::compute_vec4_mul<T, A>::map(r, v1, v2);
Detail::compute_vec4_mul<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, v2);
return r;
}
@ -145,7 +141,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator*(TVector4<T, A>& v1, T s)
{
TVector4<T, A> r;
Detail::compute_vec4_mul<T, A>::map(r, v1, s);
Detail::compute_vec4_mul<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, s);
return r;
}
@ -153,7 +149,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator/(TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
TVector4<T, A> r;
Detail::compute_vec4_div<T, A>::map(r, v1, v2);
Detail::compute_vec4_div<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, v2);
return r;
}
@ -161,7 +157,7 @@ namespace Phanes::Core::Math
TVector4<T, A> operator/(TVector4<T, A>& v1, T s)
{
TVector4<T, A> r;
Detail::compute_vec4_div<T, A>::map(r, v1, s);
Detail::compute_vec4_div<T, SIMD::use_simd<T, 4, A>::value>::map(r, v1, s);
return r;
}
@ -170,13 +166,13 @@ namespace Phanes::Core::Math
template<RealType T, bool A>
bool operator==(const TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
return Detail::compute_vec4_eq<T, A>::map(v1, v2);
return Detail::compute_vec4_eq<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v2);
}
template<RealType T, bool A>
bool operator!=(const TVector4<T, A>& v1, const TVector4<T, A>& v2)
{
return Detail::compute_vec4_ieq<T, A>::map(v1, v2);
return Detail::compute_vec4_ieq<T, SIMD::use_simd<T, 4, A>::value>::map(v1, v2);
}
@ -187,22 +183,14 @@ namespace Phanes::Core::Math
template<RealType T, bool A>
TVector4<T, A>& operator++(TVector4<T, A>& v1)
{
++v1.x;
++v1.y;
++v1.z;
++v1.w;
Detail::compute_vec4_inc<T, SIMD::use_simd<T, 4, A>::value>::map(v1);
return v1;
}
template<RealType T, bool A>
TVector4<T, A>& operator--(TVector4<T, A>& v1)
{
--v1.x;
--v1.y;
--v1.z;
--v1.w;
Detail::compute_vec4_dec<T, SIMD::use_simd<T, 4, A>::value>::map(v1);
return v1;
}