Bug fixes / SIMD improvement.

This commit is contained in:
THoehne 2024-08-16 17:36:21 +02:00
parent ac70f0e062
commit 0a731ae2c4
9 changed files with 148 additions and 55 deletions

View File

@ -8,17 +8,46 @@ namespace Phanes::Core::Math::Detail
template<RealType T, bool S>
struct compute_mat3_transpose {};
template<RealType T, bool S>
struct compute_mat3_mul {};
template<RealType T>
struct compute_mat3_transpose<T, false>
{
static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1)
{
r = TMatrix4<T, false>(m1(0, 0), m1(1, 0), m1(2, 0),
r = TMatrix3<T, false>(m1(0, 0), m1(1, 0), m1(2, 0),
m1(0, 1), m1(1, 1), m1(2, 1),
m1(0, 2), m1(1, 2), m1(2, 2)
);
}
};
template<RealType T>
struct compute_mat3_mul<T, false>
{
static constexpr void map(Phanes::Core::Math::TMatrix3<T, false>& r, const TMatrix3<T, false>& m1, const TMatrix3<T, false>& m2)
{
r(0, 0) = m1(0, 0) * m2(0, 0) + m1(0, 1) * m2(1, 0) + m1(0, 2) * m2(2, 0);
r(0, 1) = m1(0, 0) * m2(0, 1) + m1(0, 1) * m2(1, 1) + m1(0, 2) * m2(2, 1);
r(0, 2) = m1(0, 0) * m2(0, 2) + m1(0, 1) * m2(1, 2) + m1(0, 2) * m2(2, 2);
r(1, 0) = m1(1, 0) * m2(0, 0) + m1(1, 1) * m2(1, 0) + m1(1, 2) * m2(2, 0);
r(1, 1) = m1(1, 0) * m2(0, 1) + m1(1, 1) * m2(1, 1) + m1(1, 2) * m2(2, 1);
r(1, 2) = m1(1, 0) * m2(0, 2) + m1(1, 1) * m2(1, 2) + m1(1, 2) * m2(2, 2);
r(2, 0) = m1(2, 0) * m2(0, 0) + m1(2, 1) * m2(1, 0) + m1(2, 2) * m2(2, 0);
r(2, 1) = m1(2, 0) * m2(0, 1) + m1(2, 1) * m2(1, 1) + m1(2, 2) * m2(2, 1);
r(2, 2) = m1(2, 0) * m2(0, 2) + m1(2, 1) * m2(1, 2) + m1(2, 2) * m2(2, 2);
}
static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const TMatrix3<T, false>& m1, const TVector3<T, false>& v)
{
r.x = m1(0, 0) * v.x + m1(0, 1) * v.y + m1(0, 2) * v.z;
r.y = m1(1, 0) * v.x + m1(1, 1) * v.y + m1(1, 2) * v.z;
r.z = m1(2, 0) * v.x + m1(2, 1) * v.y + m1(2, 2) * v.z;
}
};
}

View File

@ -134,7 +134,7 @@ namespace Phanes::Core::Math {
template<>
FORCEINLINE float Abs<float>(float s)
{
return fabs(s);
return (float)fabs(s);
};
template<>

View File

@ -88,9 +88,31 @@ namespace Phanes::Core::Math {
// Matrix2
typedef TMatrix2<float> Matrix2;
typedef TMatrix2<float> Matrix2f;
typedef TMatrix2<double> Matrix2d;
typedef TMatrix2<float> Matrix2;
typedef TMatrix2<float> Matrix2f;
typedef TMatrix2<double> Matrix2d;
// Matrix3
typedef TMatrix3<float, false> Matrix3;
typedef TMatrix3<float, false> Matrix3f;
typedef TMatrix3<double, false> Matrix3d;
typedef TMatrix3<float, SIMD::use_simd<float, 3, true>::value> Matrix3Reg;
typedef TMatrix3<float, SIMD::use_simd<float, 3, true>::value> Matrix3Regf;
typedef TMatrix3<double, SIMD::use_simd<double, 3, true>::value> Matrix3Regd;
typedef TMatrix3<double, SIMD::use_simd<double, 3, false>::value> Matrix3Regf64;
// Matrix4
typedef TMatrix4<float, false> Matrix4;
typedef TMatrix4<float, false> Matrix4f;
typedef TMatrix4<double, false> Matrix4d;
typedef TMatrix3<float, SIMD::use_simd<float, 4, true>::value> Matrix4Reg;
typedef TMatrix3<float, SIMD::use_simd<float, 4, true>::value> Matrix4Regf;
typedef TMatrix3<double, SIMD::use_simd<double, 4, true>::value> Matrix4Regd;
typedef TMatrix3<double, SIMD::use_simd<double, 4, false>::value> Matrix4Regf64;
} // Phanes::Core::Math::coretypes

View File

@ -104,6 +104,12 @@ namespace Phanes::Core::Math {
return "([" + ToString(m(0, 0)) + ", " + ToString(m(0, 1)) + "], [" + ToString(m(1, 0)) + ", " + ToString(m(1, 1)) + "])";
}
template<RealType T, bool S>
std::string ToString(const TMatrix3<T, S>& m)
{
return "([" + ToString(m(0, 0)) + ", " + ToString(m(0, 1)) + ", " + ToString(m(0, 2)) + "], [" + ToString(m(1, 0)) + ", " + ToString(m(1, 1)) + ", " + ToString(m(1, 2)) + "], [" + ToString(m(2, 0)) + ", " + ToString(m(2, 1)) + ", " + ToString(m(2, 2)) + "])";
}
//std::string toString(const Matrix3& v);

View File

@ -15,6 +15,7 @@ namespace Phanes::Core::Math {
// 3x3 Matrix defined in column-major order.
// Accessed by M[Row][Col].
template<RealType T, bool S>
struct TMatrix3
{
@ -39,9 +40,10 @@ namespace Phanes::Core::Math {
/// </summary>
TVector3<T, S> c2;
};
T data[3][4];
};
T data[3][3];
public:
@ -76,9 +78,9 @@ namespace Phanes::Core::Math {
* Construct Matrix from parameters.
*
* @param(n00) M[0][0]
* @param(n10) M[1][0]
* @param(n01) M[0][1]
* @param(n11) M[1][1]
* @param(n10) M[0][1]
* @param(n20) M[0][2]
* @param(n01) M[1][0]
* ...
*
* @note nXY = n[Row][Col]
@ -109,6 +111,11 @@ namespace Phanes::Core::Math {
public:
FORCEINLINE T operator() (int n, int m) const
{
return this->data[m][n];
}
FORCEINLINE T& operator() (int n, int m)
{
return this->data[m][n];
@ -119,12 +126,7 @@ namespace Phanes::Core::Math {
return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
}
FORCEINLINE const T& operator() (int n, int m) const
{
return this->data[m][n];
}
FORCEINLINE const TVector3<T, S>& operator[] (int m) const
FORCEINLINE const TVector3<T, S> operator[] (int m) const
{
return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
}
@ -229,14 +231,7 @@ namespace Phanes::Core::Math {
*/
template<RealType T, bool S>
TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
m1.c0 *= m2.c0;
m1.c1 *= m2.c1;
m1.c2 *= m2.c2;
return m1;
}
TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2);
/**
* Multiply matrix with scalar
@ -298,9 +293,9 @@ namespace Phanes::Core::Math {
template<RealType T, bool S>
TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
return TMatrix2<T>(m1.c0 + m2.c0,
m1.c1 + m2.c1,
m1.c2 + m2.c2);
return TMatrix3<T, S>(m1.c0 + m2.c0,
m1.c1 + m2.c1,
m1.c2 + m2.c2);
}
/**
@ -348,21 +343,6 @@ namespace Phanes::Core::Math {
m.c2 * s);
}
/**
* Multiplay matrix by matrix (componentwise)
*
* @param(m1) Matrix
* @param(m2) Matrix
*/
template<RealType T, bool S>
TMatrix3<T, S> operator/ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
return TMatrix3<T, S>(m1.c0 / m2.c0,
m1.c1 / m2.c1,
m1.c2 / m2.c2);
}
/**
* Multiply scalar with matrix
*
@ -387,12 +367,10 @@ namespace Phanes::Core::Math {
*/
template<RealType T, bool S>
TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
return TMatrix3<T, S>(m1.c0 * m2.c0,
m1.c1 * m2.c1,
m1.c2 * m2.c2);
}
TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2);
template<RealType T, bool S>
TVector3<T, S> operator* (const TMatrix3<T, S>& m1, const TVector3<T, S>& v);
/**
* Compare matrix with other matrix.
@ -495,7 +473,7 @@ namespace Phanes::Core::Math {
*/
template<RealType T, bool S>
bool Inverse(TMatrix3<T, S>& r, const TMatrix3<T, S>& m1)
bool Inverse(const TMatrix3<T, S>& m1, Ref<TMatrix3<T, S>> r)
{
TVector3<T, S> r0 = CrossP(m1.c1, m1.c2);
TVector3<T, S> r1 = CrossP(m1.c2, m1.c0);

View File

@ -24,4 +24,28 @@ namespace Phanes::Core::Math
return r;
}
template<RealType T, bool S>
TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
TMatrix3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, m2);
return (m1 = r);
}
template<RealType T, bool S>
TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
{
TMatrix3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, m2);
return r;
}
template<RealType T, bool S>
TVector3<T, S> operator* (const TMatrix3<T, S>& m1, const TVector3<T, S>& v)
{
TVector3<T, S> r;
Detail::compute_mat3_mul<T, S>::map(r, m1, v);
return r;
}
}

View File

@ -26,7 +26,6 @@ namespace Phanes::Core::Math::SIMD
/// </summary>
/// <typeparam name="T">Type of vector</typeparam>
/// <typeparam name="L">Length of vector</typeparam>
/// <typeparam name="SimdActive">Whether SIMD intrinsics exist, that support the vector type and length.</typeparam>
/// <typeparam name="IsAligned">Whether the vector is aligned for simd usage.</typeparam>
template<typename T, size_t L, bool IsAligned>
struct use_simd

View File

@ -770,6 +770,45 @@ namespace Phanes::Core::Math::Detail
}
};
template<>
struct compute_mat3_mul<float, true>
{
static FORCEINLINE void map(Phanes::Core::Math::TMatrix3<float, true>& r, const TMatrix3<float, true>& m1, const TMatrix3<float, true>& m2)
{
// First column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c0.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c0.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c0.data);
r.c0.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
// Second column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c1.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c1.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c1.data);
r.c1.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
// Third column
__m128 tmp0 = _mm_mul_ps(m1.c0.data, m2.c2.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, m2.c2.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, m2.c2.data);
r.c2.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
}
static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& r, const TMatrix3<float, true>& m1, const TVector3<float, true>& v)
{
__m128 tmp0 = _mm_mul_ps(m1.c0.data, v.data);
__m128 tmp1 = _mm_mul_ps(m1.c1.data, v.data);
__m128 tmp2 = _mm_mul_ps(m1.c2.data, v.data);
r.data = _mm_add_ps(_mm_add_ps(tmp0, tmp1), tmp2);
}
};
// =========== //
// Matrix4 //
// =========== //

View File

@ -14,17 +14,13 @@ namespace Phanes::Core::Math::SIMD
template<size_t L, typename T>
struct Storage<L, T, false>
{
typedef struct type {
T data[L];
} type;
typedef T type[L];
};
template<typename T>
struct Storage<3, T, false>
{
typedef struct type {
T data[4];
} type;
typedef T type[4];
};