IntVector2 SIMD improvement.
This commit is contained in:
parent
c3f17c817d
commit
e545c1078f
@ -153,8 +153,8 @@ namespace Phanes::Core::Math::Detail
|
||||
}
|
||||
};
|
||||
|
||||
template<IntType T, bool S>
|
||||
struct compute_ivec2_mod
|
||||
template<IntType T>
|
||||
struct compute_ivec2_mod<T, false>
|
||||
{
|
||||
static constexpr void map(Phanes::Core::Math::TIntVector2<T, false>& r, const Phanes::Core::Math::TIntVector2<T, false>& v1, const Phanes::Core::Math::TIntVector2<T, false>& v2)
|
||||
{
|
||||
|
@ -2,4 +2,5 @@
|
||||
|
||||
// --- Vectors ------------------------
|
||||
|
||||
#include "Core/public/Math/Vector2.hpp" // <-- Includes Vector3/4 automatically
|
||||
#include "Core/public/Math/Vector2.hpp" // <-- Includes Vector3/4 automatically
|
||||
#include "Core/public/Math/IntVector2.hpp"
|
@ -7,8 +7,6 @@
|
||||
|
||||
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
|
||||
|
||||
#include "Core/public/Math/IntVector2.hpp"
|
||||
|
||||
namespace Phanes::Core::Math
|
||||
{
|
||||
template<IntType T, bool A>
|
||||
|
@ -51,13 +51,6 @@ namespace Phanes::Core::Math {
|
||||
* Specific instantiation of forward declarations.
|
||||
*/
|
||||
|
||||
// TIntVector2
|
||||
typedef TIntVector2<int> IntVector2;
|
||||
typedef TIntVector2<long> IntVector2l;
|
||||
|
||||
typedef std::vector<IntVector2> IntVector2List;
|
||||
typedef std::vector<IntVector2l> IntVector2Listl;
|
||||
|
||||
// TIntVector3
|
||||
typedef TIntVector3<int> IntVector3;
|
||||
typedef TIntVector3<long> IntVector3l;
|
||||
|
@ -197,28 +197,6 @@ namespace Phanes::Core::Math::Detail
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_vec4_eq<float, true>
|
||||
{
|
||||
static FORCEINLINE bool map(const Phanes::Core::Math::TVector4<float, true>& v1, const Phanes::Core::Math::TVector4<float, true>& v2)
|
||||
{
|
||||
float r;
|
||||
_mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_vec4_ieq<float, true>
|
||||
{
|
||||
static FORCEINLINE bool map(const Phanes::Core::Math::TVector4<float, true>& v1, const Phanes::Core::Math::TVector4<float, true>& v2)
|
||||
{
|
||||
float r;
|
||||
_mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// ============ //
|
||||
// TVector3 //
|
||||
@ -257,34 +235,6 @@ namespace Phanes::Core::Math::Detail
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<> struct compute_vec3_eq<float, true> : public compute_vec4_eq<float, true>
|
||||
{
|
||||
static FORCEINLINE bool map(Phanes::Core::Math::TVector3<float, true>& v1, Phanes::Core::Math::TVector3<float, true>& v2)
|
||||
{
|
||||
v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f);
|
||||
v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
|
||||
|
||||
float r;
|
||||
_mm_store_ps1(&r, _mm_cmpeq_ps(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct compute_vec3_ieq<float, true> : public compute_vec4_ieq<float, true>
|
||||
{
|
||||
static FORCEINLINE bool map(Phanes::Core::Math::TVector3<float, true>& v1, Phanes::Core::Math::TVector3<float, true>& v2)
|
||||
{
|
||||
v1.comp = _mm_setr_ps(v1.x, v1.y, v1.z, 0.0f);
|
||||
v2.comp = _mm_setr_ps(v2.x, v2.y, v2.z, 0.0f);
|
||||
|
||||
float r;
|
||||
_mm_store_ps1(&r, _mm_cmpneq_ps(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<> struct compute_vec3_add<float, true> : public compute_vec4_add<float, true> {};
|
||||
template<> struct compute_vec3_sub<float, true> : public compute_vec4_sub<float, true> {};
|
||||
template<> struct compute_vec3_mul<float, true> : public compute_vec4_mul<float, true> {};
|
||||
@ -399,32 +349,149 @@ namespace Phanes::Core::Math::Detail
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_vec2_eq<double, true>
|
||||
{
|
||||
static FORCEINLINE bool map(const Phanes::Core::Math::TVector2<double, true>& v1, const Phanes::Core::Math::TVector2<double, true>& v2)
|
||||
{
|
||||
double r;
|
||||
_mm_store1_pd(&r, _mm_cmpeq_pd(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_vec2_ieq<double, true>
|
||||
{
|
||||
static FORCEINLINE bool map(const Phanes::Core::Math::TVector2<double, true>& v1, const Phanes::Core::Math::TVector2<double, true>& v2)
|
||||
{
|
||||
double r;
|
||||
_mm_store1_pd(&r, _mm_cmpneq_pd(v1.comp, v2.comp));
|
||||
return (r == 0xffffffff) ? true : false;
|
||||
}
|
||||
};
|
||||
|
||||
// =============== //
|
||||
// TIntVector2 //
|
||||
// =============== //
|
||||
|
||||
template<>
|
||||
struct construct_ivec2<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
v1.comp = _mm_setr_epi64x(v2.x, v2.y);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, Phanes::Core::Types::int64 s)
|
||||
{
|
||||
v1.comp = _mm_set1_epi64x(s);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, Phanes::Core::Types::int64 x, Phanes::Core::Types::int64 y)
|
||||
{
|
||||
v1.comp = _mm_setr_epi64x(x, y);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Types::int64* comp)
|
||||
{
|
||||
v1.comp = _mm_loadu_epi64(comp);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_add<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_add_epi64(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_sub<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_sub_epi64(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_inc<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1)
|
||||
{
|
||||
r.comp = _mm_add_epi64(v1.comp, _mm_set1_epi64x(1));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_dec<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1)
|
||||
{
|
||||
r.comp = _mm_sub_epi64(v1.comp, _mm_set1_epi64x(1));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_and<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_and_si128(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_and_si128(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_or<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_or_si128(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_or_si128(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_xor<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_xor_si128(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_xor_si128(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_left_shift<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_sll_epi64(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_sll_epi64(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct compute_ivec2_right_shift<Phanes::Core::Types::int64, true>
|
||||
{
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v2)
|
||||
{
|
||||
r.comp = _mm_srl_epi64(v1.comp, v2.comp);
|
||||
}
|
||||
|
||||
static FORCEINLINE void map(Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& r, const Phanes::Core::Math::TIntVector2<Phanes::Core::Types::int64, true>& v1, T s)
|
||||
{
|
||||
r.comp = _mm_srl_epi64(v1.comp, _mm_set1_epi64x(s));
|
||||
}
|
||||
};
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user