IntVector2 SIMD improvement.

This commit is contained in:
scorpioblood 2024-06-07 16:17:33 +02:00
parent c3f17c817d
commit e545c1078f
5 changed files with 143 additions and 84 deletions

View File

@ -153,8 +153,8 @@ namespace Phanes::Core::Math::Detail
}
};
template<IntType T, bool S>
struct compute_ivec2_mod
template<IntType T>
struct compute_ivec2_mod<T, false>
{
static constexpr void map(Phanes::Core::Math::TIntVector2<T, false>& r, const Phanes::Core::Math::TIntVector2<T, false>& v1, const Phanes::Core::Math::TIntVector2<T, false>& v2)
{

View File

@ -3,3 +3,4 @@
// --- Vectors ------------------------
#include "Core/public/Math/Vector2.hpp" // <-- Includes Vector3/4 automatically
#include "Core/public/Math/IntVector2.hpp"

View File

@ -7,8 +7,6 @@
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
#include "Core/public/Math/IntVector2.hpp"
namespace Phanes::Core::Math
{
template<IntType T, bool A>

View File

@ -51,13 +51,6 @@ namespace Phanes::Core::Math {
* Specific instantiation of forward declarations.
*/
// TIntVector2
typedef TIntVector2<int> IntVector2;
typedef TIntVector2<long> IntVector2l;
typedef std::vector<IntVector2> IntVector2List;
typedef std::vector<IntVector2l> IntVector2Listl;
// TIntVector3
typedef TIntVector3<int> IntVector3;
typedef TIntVector3<long> IntVector3l;

View File

@ -197,28 +197,6 @@ namespace Phanes::Core::Math::Detail
}
};
template<>
struct compute_vec4_eq<float, true>
{
static FORCEINLINE bool map(const Phanes::Core::Math::TVector4<float, true>& v1, const Phanes::Core::Math::TVector4<float, true>& v2)
{
float r;
_mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
template<>
struct compute_vec4_ieq<float, true>
{
static FORCEINLINE bool map(const Phanes::Core::Math::TVector4<float, true>& v1, const Phanes::Core::Math::TVector4<float, true>& v2)
{
float r;
_mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
// ============ //
// TVector3 //
@ -257,34 +235,6 @@ namespace Phanes::Core::Math::Detail
};
template<> struct compute_vec3_eq<float, true> : public compute_vec4_eq<float, true>
{
static FORCEINLINE bool map(Phanes::Core::Math::TVector3<float, true>& v1, Phanes::Core::Math::TVector3<float, true>& v2)
{
v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f);
v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
float r;
_mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
template<> struct compute_vec3_ieq<float, true> : public compute_vec4_ieq<float, true>
{
static FORCEINLINE bool map(Phanes::Core::Math::TVector3<float, true>& v1, Phanes::Core::Math::TVector3<float, true>& v2)
{
v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f);
v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
float r;
_mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
template<> struct compute_vec3_add<float, true> : public compute_vec4_add<float, true> {};
template<> struct compute_vec3_sub<float, true> : public compute_vec4_sub<float, true> {};
template<> struct compute_vec3_mul<float, true> : public compute_vec4_mul<float, true> {};
@ -399,32 +349,149 @@ namespace Phanes::Core::Math::Detail
}
};
template<>
struct compute_vec2_eq<double, true>
{
static FORCEINLINE bool map(const Phanes::Core::Math::TVector2<double, true>& v1, const Phanes::Core::Math::TVector2<double, true>& v2)
{
double r;
_mm_store1_pd(&r, _mm_cmpeq_pd(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
template<>
struct compute_vec2_ieq<double, true>
{
static FORCEINLINE bool map(const Phanes::Core::Math::TVector2<double, true>& v1, const Phanes::Core::Math::TVector2<double, true>& v2)
{
double r;
_mm_store1_pd(&r, _mm_cmpneq_pd(v1.comp, v2.comp));
return (r == 0xffffffff) ? true : false;
}
};
// =============== //
// TIntVector2 //
// =============== //
template<>
struct construct_ivec2<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
v1.comp = _mm_setr_epi64x(v2.x, v2.y);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, Phanes::Core::Types::int64 s)
{
v1.comp = _mm_set1_epi64x(s);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, Phanes::Core::Types::int64 x, Phanes::Core::Types::int64 y)
{
v1.comp = _mm_setr_epi64x(x, y);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Types::int64* comp)
{
v1.comp = _mm_loadu_epi64(comp);
}
};
template<>
struct compute_ivec2_add<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_add_epi64(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_sub<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_sub_epi64(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_inc<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1)
{
r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(1));
}
};
template<>
struct compute_ivec2_dec<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1)
{
r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(1));
}
};
template<>
struct compute_ivec2_and<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_and_si128(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_and_si128(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_or<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_or_si128(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_or_si128(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_xor<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_xor_si128(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_left_shift<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_sll_epi64(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_sll_epi64(v1.comp, _mm_set1_epi64x(s));
}
};
template<>
struct compute_ivec2_right_shift<Phanes::Core::Types::int64, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
{
r.comp = _mm_srl_epi64(v1.comp, v2.comp);
}
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
{
r.comp = _mm_srl_epi64(v1.comp, _mm_set1_epi64x(s));
}
};
}