From ed44c3695c76c2471b6a1f3b1d4c4bb9de7ac64b Mon Sep 17 00:00:00 2001
From: scorpioblood <77296181+scorpioblood@users.noreply.github.com>
Date: Wed, 12 Jun 2024 23:26:39 +0200
Subject: [PATCH] Update Matrix3.

---
 .../Core/public/Math/Detail/Vector2Decl.inl   |  78 +++--
 .../Core/public/Math/Detail/Vector3Decl.inl   |  14 +
 .../Source/Runtime/Core/public/Math/MathFwd.h |   2 +-
 .../Runtime/Core/public/Math/Matrix2.hpp      |  32 +-
 .../Runtime/Core/public/Math/Matrix3.hpp      | 279 +++++++++---------
 .../public/Math/SIMD/PhanesVectorMathFPU.hpp  |  66 +++++
 .../public/Math/SIMD/PhanesVectorMathSSE.hpp  |  22 ++
 .../Runtime/Core/public/Math/Vector3.hpp      |  20 +-
 .../Runtime/Core/public/Math/Vector3.inl      |  17 ++
 9 files changed, 306 insertions(+), 224 deletions(-)
diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl
index e530c9e..c19a769 100644
--- a/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl
+++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector2Decl.inl
@@ -21,12 +21,6 @@ namespace Phanes::Core::Math::Detail
     template<RealType T, bool S>
     struct compute_vec2_div {};
 
-    template<RealType T, bool S>
-    struct compute_vec2_eq {};
-
-    template<RealType T, bool S>
-    struct compute_vec2_ieq {};
-
     template<RealType T, bool S>
     struct compute_vec2_inc {};
 
@@ -38,29 +32,29 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct construct_vec2<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& v1, const TVector2<T, A>& v2)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& v1, const TVector2<T, false>& v2)
         {
             v1.x = v2.x;
             v1.y = v2.y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& v1, T s)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& v1, T s)
         {
             v1.x = s;
             v1.y = s;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& v1, T x, T y)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& v1, T x, T y)
         {
             v1.x = x;
             v1.y = y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& v1, const T* comp)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& v1, const T* comp)
         {
             v1.x = comp[0];
             v1.y = comp[1];
@@ -71,15 +65,15 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_add<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2)
         {
             r.x = v1.x + v2.x;
             r.y = v1.y + v2.y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, T s)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, T s)
         {
             r.x = v1.x + s;
             r.y = v1.y + s;
@@ -90,15 +84,15 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_sub<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2)
         {
             r.x = v1.x - v2.x;
             r.y = v1.y - v2.y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, T s)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, T s)
         {
             r.x = v1.x - s;
             r.y = v1.y - s;
@@ -109,15 +103,15 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_mul<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2)
         {
             r.x = v1.x * v2.x;
             r.y = v1.y * v2.y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, T s)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, T s)
         {
             r.x = v1.x * s;
             r.y = v1.y * s;
@@ -128,15 +122,15 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_div<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, const Phanes::Core::Math::TVector2<T, false>& v2)
         {
             r.x = v1.x / v2.x;
             r.y = v1.y / v2.y;
         }
 
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1, T s)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1, T s)
         {
             s = (T)1.0 / s;
 
@@ -145,22 +139,22 @@ namespace Phanes::Core::Math::Detail
         }
     };
 
-    template<RealType T>
-    struct compute_vec2_eq<T, false>
+    template<RealType T, bool S>
+    struct compute_vec2_eq
     {
-        template<bool A>
-        static constexpr bool map(const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr bool map(const Phanes::Core::Math::TVector2<T, S>& v1, const Phanes::Core::Math::TVector2<T, S>& v2)
         {
             return (Phanes::Core::Math::Abs(v1.x - v2.x) < P_FLT_INAC &&
                 Phanes::Core::Math::Abs(v1.y - v2.y) < P_FLT_INAC);
         }
     };
 
-    template<RealType T>
-    struct compute_vec2_ieq<T, false>
+    template<RealType T, bool S>
+    struct compute_vec2_ieq
     {
-        template<bool A>
-        static constexpr bool map(const Phanes::Core::Math::TVector2<T, A>& v1, const Phanes::Core::Math::TVector2<T, A>& v2)
+        
+        static constexpr bool map(const Phanes::Core::Math::TVector2<T, S>& v1, const Phanes::Core::Math::TVector2<T, S>& v2)
         {
             return (Phanes::Core::Math::Abs(v1.x - v2.x) > P_FLT_INAC ||
                 Phanes::Core::Math::Abs(v1.y - v2.y) > P_FLT_INAC);
@@ -170,8 +164,8 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_inc<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1)
         {
             r.x = v1.x + 1;
             r.y = v1.y + 1;
@@ -181,8 +175,8 @@ namespace Phanes::Core::Math::Detail
     template<RealType T>
     struct compute_vec2_dec<T, false>
     {
-        template<bool A>
-        static constexpr void map(Phanes::Core::Math::TVector2<T, A>& r, const Phanes::Core::Math::TVector2<T, A>& v1)
+        
+        static constexpr void map(Phanes::Core::Math::TVector2<T, false>& r, const Phanes::Core::Math::TVector2<T, false>& v1)
         {
             r.x = v1.x - 1;
             r.y = v1.y - 1;
diff --git a/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl b/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl
index 6151cb6..5612c25 100644
--- a/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl
+++ b/Engine/Source/Runtime/Core/public/Math/Detail/Vector3Decl.inl
@@ -31,6 +31,9 @@ namespace Phanes::Core::Math::Detail
     template<RealType T, bool S>
     struct compute_vec3_dec {};
 
+    template<RealType T, bool S>
+    struct compute_vec3_cross_p {};
+
 
 
     template<RealType T>
@@ -199,5 +202,16 @@ namespace Phanes::Core::Math::Detail
             r.z = v1.z - 1;
         }
     };
+
+    template<RealType T>
+    struct compute_vec3_cross_p<T, false>
+    {
+        static constexpr void map(Phanes::Core::Math::TVector3<T, false>& r, const Phanes::Core::Math::TVector3<T, false>& v1, const Phanes::Core::Math::TVector3<T, false>& v2)
+        {
+            r.x = (v1.y * v2.z) - (v1.z * v2.y);
+            r.y = (v1.z * v2.x) - (v1.x * v2.z);
+            r.z = (v1.x * v2.y) - (v1.y * v2.x);
+        }
+    };
 }
 
diff --git a/Engine/Source/Runtime/Core/public/Math/MathFwd.h b/Engine/Source/Runtime/Core/public/Math/MathFwd.h
index 439942a..36529b5 100644
--- a/Engine/Source/Runtime/Core/public/Math/MathFwd.h
+++ b/Engine/Source/Runtime/Core/public/Math/MathFwd.h
@@ -29,7 +29,6 @@ namespace Phanes::Core::Math {
     template<RealType T>    struct TRay;
     template<RealType T>    struct TLine;
     template<RealType T>    struct TPlane;
-    template<RealType T>    struct TMatrix3;
     template<RealType T>    struct TMatrix4;
     template<RealType T>    struct TQuaternion;
     template<RealType T>    struct TTransform;
@@ -40,6 +39,7 @@ namespace Phanes::Core::Math {
     template<IntType T>		struct TIntPoint3;
     template<IntType T>		struct TIntPoint4;
     template<RealType T>    struct TMatrix2;
+    template<RealType T, bool S>    struct TMatrix3;
     template<RealType T, bool S>    struct TVector2;
     template<RealType T, bool S>    struct TVector3;
     template<RealType T, bool S>	struct TVector4;
diff --git a/Engine/Source/Runtime/Core/public/Math/Matrix2.hpp b/Engine/Source/Runtime/Core/public/Math/Matrix2.hpp
index e6106aa..4f8165d 100644
--- a/Engine/Source/Runtime/Core/public/Math/Matrix2.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/Matrix2.hpp
@@ -94,30 +94,24 @@ namespace Phanes::Core::Math {
         }
 
     public:
-        
-        constexpr GetCol(int n)
-        {
-            switch (n)
-            {
-            case 0:
-                return this->c0;
-            case 1:
-                return this->c1;
-            default:
-                break;
-            }
-        }
+       
 
         FORCEINLINE T operator() (int n, int m) const
         {
             this->data[m][n];
         }
 
-        FORCEINLINE TVector2<T, false>& operator[] (int m) const
+        FORCEINLINE TVector2<T, false> operator[] (int m) const
         {
-            static_assert(m > -1 && m < 2, "(PHANES_CORE::MATH [Matrix2.hpp]): m must be between 0 or 1.");
+            switch (m)
+            {
+            case 0:
+                return this->c0;
+            case 1:
+                return this->c1;
+            }
 
-            return GetCol(m);
+            throw std::invalid_argument("m is outside valid range.");
         }
 
     };
@@ -247,15 +241,13 @@ namespace Phanes::Core::Math {
     template<RealType T>
     bool operator== (const TMatrix2<T>& m1, const TMatrix2<T>& m2)
     {
-            return (abs(m1(0, 0) - m2(0, 0)) < P_FLT_INAC && abs(m1(0, 1) - m2(0, 1)) < P_FLT_INAC &&
-                            abs(m1(1, 0) - m2(1, 0)) < P_FLT_INAC && abs(m1(1, 1) - m2(1, 1)) < P_FLT_INAC);
+            return m1[0] == m2[0] && m1[1] == m2[1];
     }
 
     template<RealType T>
     bool operator!= (const TMatrix2<T>& m1, const TMatrix2<T>& m2)
     {
-            return (abs(m1(0, 0) - m2(0, 0)) > P_FLT_INAC || abs(m1(0, 1) - m2(0, 1)) > P_FLT_INAC ||
-                            abs(m1(1, 0) - m2(1, 0)) > P_FLT_INAC || abs(m1(1, 1) - m2(1, 1)) > P_FLT_INAC);
+            return m1[0] != m2[0] || m1[1] != m2[1];
     }
 
 
diff --git a/Engine/Source/Runtime/Core/public/Math/Matrix3.hpp b/Engine/Source/Runtime/Core/public/Math/Matrix3.hpp
index f1f85ef..a4d9d17 100644
--- a/Engine/Source/Runtime/Core/public/Math/Matrix3.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/Matrix3.hpp
@@ -3,6 +3,7 @@
 #include "Core/public/Math/Boilerplate.h"
 
 #include "Core/public/Math/MathAbstractTypes.h"
+#include "Core/public/Math/MathFwd.h"
 #include "Core/public/Math/Vector3.hpp"
 
 #ifndef MATRIX3_H
@@ -14,12 +15,33 @@ namespace Phanes::Core::Math {
     // 3x3 Matrix defined in column-major order.
     // Accessed by M[Row][Col].
 
-    template<RealType T>
+    template<RealType T, bool S>
     struct TMatrix3
     {
     public:
 
-        T m[3][3];
+        union
+        {
+            struct
+            {
+                /// <summary>
+                /// Column one.
+                /// </summary>
+                TVector3<T, S> c0;
+
+                /// <summary>
+                /// Column two.
+                /// </summary>
+                TVector3<T, S> c1;
+
+                /// <summary>
+                /// Column three.
+                /// </summary>
+                TVector3<T, S> c2;
+            };
+        };
+
+        T data[3][3];
 
 
     public:
@@ -30,22 +52,24 @@ namespace Phanes::Core::Math {
          * Copy constructor.
          */
 
-        TMatrix3(const TMatrix3<T>& m1)
+        TMatrix3(const TMatrix3<T, S>& m1)
         {
-            memcpy(this->m, m1.m, sizeof(T) * 9);
+            this->c0 = TVector3<T, S>(m1.c0);
+            this->c1 = TVector3<T, S>(m1.c1);
+            this->c2 = TVector3<T, S>(m1.c2);
         }
 
         /**
          * Construct Matrix from 2d array.
          *
-         * @param(fields) 2D Array with column major order.
+         * @param(fields) 2D Array with row major order.
          */
 
-        TMatrix3(T fields[2][2])
+        TMatrix3(T fields[3][3])
         {
-            this->m[0][0] = fields[0][0]; this->m[1][0] = fields[1][0]; this->m[2][0] = fields[2][0];
-            this->m[0][1] = fields[0][1]; this->m[1][1] = fields[1][1]; this->m[2][1] = fields[2][1];
-            this->m[0][2] = fields[0][2]; this->m[1][2] = fields[1][2]; this->m[2][2] = fields[2][2];
+            this->c0 = TVector3<T, S>(fields[0][0], fields[1][0], fields[2][0]);
+            this->c1 = TVector3<T, S>(fields[0][1], fields[1][1], fields[2][1]);
+            this->c2 = TVector3<T, S>(fields[0][2], fields[1][2], fields[2][2]);
         }
 
         /**
@@ -55,6 +79,7 @@ namespace Phanes::Core::Math {
          * @param(n10) M[1][0]
          * @param(n01) M[0][1]
          * @param(n11) M[1][1]
+         * ...
          *
          * @note nXY = n[Row][Col]
          */
@@ -63,9 +88,9 @@ namespace Phanes::Core::Math {
                  T n10, T n11, T n12,
                  T n20, T n21, T n22)
         {
-            this->m[0][0] = n00; this->m[1][0] = n01; this->m[2][0] = n02;
-            this->m[1][0] = n10; this->m[1][1] = n11; this->m[2][1] = n12;
-            this->m[1][2] = n20; this->m[1][2] = n21; this->m[2][2] = n22;
+            this->c0 = TVector3<T, S>(n00,n10,n20);
+            this->c1 = TVector3<T, S>(n01,n11,n21);
+            this->c2 = TVector3<T, S>(n02,n12,n22);
         }
 
         /**
@@ -75,11 +100,11 @@ namespace Phanes::Core::Math {
          * @param(v2) Column one
          */
 
-        TMatrix3(const TVector3<T>& v1, const TVector3<T>& v2, const TVector3<T> v3)
+        TMatrix3(const TVector3<T, S>& v1, const TVector3<T, S>& v2, const TVector3<T, S> v3)
         {
-            this->m[0][0] = v1.x; this->m[1][0] = v2.x; this->m[2][0] = v3.x;
-            this->m[0][1] = v1.y; this->m[1][1] = v2.y; this->m[2][1] = v3.y;
-            this->m[0][2] = v1.z; this->m[1][2] = v2.z; this->m[2][2] = v3.z;
+            this->c0 = v1;
+            this->c1 = v2;
+            this->c2 = v3;
         }
 
     public:
@@ -99,9 +124,9 @@ namespace Phanes::Core::Math {
             return this->m[m][n];
         }
 
-        FORCEINLINE const TVector3<T>& operator[] (int m) const
+        FORCEINLINE const TVector3<T, S>& operator[] (int m) const
         {
-            return (*reinterpret_cast<TVector3<T>*>(this->m[m]));
+            return (*reinterpret_cast<TVector3<T, S>*>(this->m[m]));
         }
 
     };
@@ -118,12 +143,12 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator+= (TMatrix3<T>& m1, T s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator+= (TMatrix3<T, S>& m1, T s)
     {
-        m1(0, 0) += s; m1(0, 1) += s; m1(0, 2) += s;
-        m1(1, 0) += s; m1(1, 1) += s; m1(1, 2) += s;
-        m1(2, 0) += s; m1(2, 1) += s; m1(2, 2) += s;
+        m1.c0 += s;
+        m1.c1 += s;
+        m1.c2 += s;
 
         return m1;
     }
@@ -135,12 +160,12 @@ namespace Phanes::Core::Math {
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> operator+= (TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator+= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        m1(0, 0) += m2(0, 0); m1(0, 1) += m2(0, 1); m1(0, 2) += m2(0, 2);
-        m1(1, 0) += m2(1, 0); m1(1, 1) += m2(1, 1); m1(1, 2) += m2(1, 2);
-        m1(2, 0) += m2(2, 0); m1(2, 1) += m2(2, 1); m1(2, 2) += m2(2, 2);
+        m1.c0 += m2.c0;
+        m1.c1 += m2.c1;
+        m1.c2 += m2.c2;
 
         return m1;
     }
@@ -152,12 +177,12 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator-= (TMatrix3<T>& m1, T s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator-= (TMatrix3<T, S>& m1, T s)
     {
-        m1(0, 0) -= s; m1(0, 1) -= s; m1(0, 2) -= s;
-        m1(1, 0) -= s; m1(1, 1) -= s; m1(1, 2) -= s;
-        m1(2, 0) -= s; m1(2, 1) -= s; m1(2, 2) -= s;
+        m1.c0 -= s;
+        m1.c1 -= s;
+        m1.c2 -= s;
 
         return m1;
     }
@@ -169,12 +194,12 @@ namespace Phanes::Core::Math {
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> operator-= (TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator-= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        m1(0, 0) -= m2(0, 0); m1(0, 1) -= m2(0, 1); m1(0, 2) -= m2(0, 2);
-        m1(1, 0) -= m2(1, 0); m1(1, 1) -= m2(1, 1); m1(1, 2) -= m2(1, 2);
-        m1(2, 0) -= m2(2, 0); m1(2, 1) -= m2(2, 1); m1(2, 2) -= m2(2, 2);
+        m1.c0 -= m2.c0;
+        m1.c1 -= m2.c1;
+        m1.c2 -= m2.c2;
 
         return m1;
     }
@@ -186,38 +211,29 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator*= (TMatrix3<T>& m1, T s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, T s)
     {
-        m1(0, 0) *= s; m1(0, 1) *= s; m1(0, 2) *= s;
-        m1(1, 0) *= s; m1(1, 1) *= s; m1(1, 2) *= s;
-        m1(2, 0) *= s; m1(2, 1) *= s; m1(2, 2) *= s;
+        m1.c0 *= ss;
+        m1.c1 *= ss;
+        m1.c2 *= ss;
 
         return m1;
     }
 
     /**
-     * Matrix on matrix multiplication
+     * Matrix on matrix (componentwise)
      *
      * @param(m1) Matrix
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> operator*= (TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator*= (TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        TMatrix3<T> c = m1;
-        m1(0, 0) = c(0, 0) * m2(0, 0) + c(0, 1) * m2(1, 0) + c(0, 2) * m2(2, 0);
-        m1(0, 1) = c(0, 0) * m2(0, 1) + c(0, 1) * m2(1, 1) + c(0, 2) * m2(2, 1);
-        m1(0, 2) = c(0, 0) * m2(0, 2) + c(0, 1) * m2(1, 2) + c(0, 2) * m2(2, 2);
-
-        m1(1, 0) = c(1, 0) * m2(0, 0) + c(1, 1) * m2(1, 0) + c(1, 2) * m2(2, 0);
-        m1(1, 1) = c(1, 0) * m2(0, 1) + c(1, 1) * m2(1, 1) + c(1, 2) * m2(2, 1);
-        m1(1, 2) = c(1, 0) * m2(0, 2) + c(1, 1) * m2(1, 2) + c(1, 2) * m2(2, 2);
-
-        m1(2, 0) = c(2, 0) * m2(0, 0) + c(2, 1) * m2(1, 0) + c(2, 2) * m2(2, 0);
-        m1(2, 1) = c(2, 0) * m2(0, 1) + c(2, 1) * m2(1, 1) + c(2, 2) * m2(2, 1);
-        m1(2, 2) = c(2, 0) * m2(0, 2) + c(2, 1) * m2(1, 2) + c(2, 2) * m2(2, 2);
+        m1.c0 *= m2.c0;
+        m1.c1 *= m2.c1;
+        m1.c2 *= m2.c2;
 
         return m1;
     }
@@ -229,12 +245,12 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator+ (const TMatrix3<T>& m, T s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m, T s)
     {
-        return TMatrix3<T>(m(0, 0) + s, m(0, 1) + s, m(0, 2) + s,
-                           m(1, 0) + s, m(1, 1) + s, m(1, 2) + s,
-                           m(2, 0) + s, m(2, 1) + s, m(2, 2) + s);
+        return TMatrix3<T, S>(m.c0 + s,
+                            m.c1 + s,
+                            m.c2 + s);
     }
 
     /**
@@ -244,12 +260,12 @@ namespace Phanes::Core::Math {
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> operator+ (const TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator+ (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        return TMatrix2<T>(m1(0, 0) + m2(0, 0), m1(0, 1) + m2(0, 1), m1(0, 2) + m2(0, 2),
-                           m1(1, 0) + m2(1, 0), m1(1, 1) + m2(1, 1), m1(1, 2) + m2(1, 2),
-                           m1(2, 0) + m2(2, 0), m1(2, 1) + m2(2, 1), m1(2, 2) + m2(2, 2));
+        return TMatrix2<T>(m1.c0 + m2.c0,
+                           m1.c1 + m2.c1,
+                           m1.c2 + m2.c2);
     }
 
     /**
@@ -259,12 +275,12 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator- (const TMatrix3<T>& m, T s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator- (const TMatrix3<T, S>& m, T s)
     {
-        return TMatrix3<T>(m(0, 0) - s, m(0, 1) - s, m(0, 2) - s,
-                           m(1, 0) - s, m(1, 1) - s, m(1, 2) - s,
-                           m(2, 0) - s, m(2, 1) - s, m(2, 2) - s);
+        return TMatrix3<T, S>(m1.c0 - s,
+                              m1.c1 - s,
+                              m1.c2 - s);
     }
 
     /**
@@ -274,12 +290,12 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator- (const TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator- (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        return TMatrix3<T>(m1(0, 0) - m2(0, 0), m1(0, 1) - m2(0, 1), m1(0, 2) - m2(0, 2),
-                           m1(1, 0) - m2(1, 0), m1(1, 1) - m2(1, 1), m1(1, 2) - m2(1, 2),
-                           m1(2, 0) - m2(2, 0), m1(2, 1) - m2(2, 1), m1(2, 2) - m2(2, 2));
+        return TMatrix3<T, S>(m1.c0 - m2.c0,
+                              m1.c1 - m2.c1,
+                              m1.c2 - m2.c2);
     }
 
     /**
@@ -289,50 +305,27 @@ namespace Phanes::Core::Math {
      * @param(s) Scalar
      */
 
-    template<RealType T>
-    TMatrix3<T> operator* (const TMatrix3<T>& m, float s)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator* (const TMatrix3<T, S>& m, float s)
     {
-        return TMatrix3<T>(m(0, 0) * s, m(0, 1) * s, m(0, 2) * s,
-                           m(1, 0) * s, m(1, 1) * s, m(1, 2) * s,
-                           m(2, 0) * s, m(2, 1) * s, m(2, 2) * s);
+        return TMatrix3<T, S>(m.c0 * s,
+                              m.c1 * s,
+                              m.c2 * s);
     }
 
     /**
-     * Multiplay matrix by matrix
+     * Multiplay matrix by matrix (componentwise)
      *
      * @param(m1) Matrix
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> operator* (const TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    TMatrix3<T, S> operator* (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
-        return TMatrix3<T>(m1(0, 0) * m2(0, 0) + m1(0, 1) * m2(1, 0) + m1(0, 2) * m2(2, 0),
-                           m1(0, 0) * m2(0, 1) + m1(0, 1) * m2(1, 1) + m1(0, 2) * m2(2, 1),
-                           m1(0, 0) * m2(0, 2) + m1(0, 1) * m2(1, 2) + m1(0, 2) * m2(2, 2),
-
-                           m1(1, 0) * m2(0, 0) + m1(1, 1) * m2(1, 0) + m1(1, 2) * m2(2, 0),
-                           m1(1, 0) * m2(0, 1) + m1(1, 1) * m2(1, 1) + m1(1, 2) * m2(2, 1),
-                           m1(1, 0) * m2(0, 2) + m1(1, 1) * m2(1, 2) + m1(1, 2) * m2(2, 2),
-
-                           m1(2, 0) * m2(0, 0) + m1(2, 1) * m2(1, 0) + m1(2, 2) * m2(2, 0),
-                           m1(2, 0) * m2(0, 1) + m1(2, 1) * m2(1, 1) + m1(2, 2) * m2(2, 1),
-                           m1(2, 0) * m2(0, 2) + m1(2, 1) * m2(1, 2) + m1(2, 2) * m2(2, 2));
-    }
-
-    /**
-     * Add matrix to matrix componentwise
-     *
-     * @param(m1) Matrix
-     * @param(m2) Matrix
-     */
-
-    template<RealType T>
-    TVector3<T> operator* (const TMatrix3<T>& m1, const TVector3<T>& v)
-    {
-        return TVector3<T>(m1(0, 0) * v.x + m1(0, 1) * v.y + m1(0, 2) * v.z,
-                           m1(1, 0) * v.x + m1(1, 1) * v.y + m1(1, 2) * v.z,
-                           m1(2, 0) * v.x + m1(2, 1) * v.y + m1(2, 2) * v.z);
+        return TMatrix3<T, S>(m1.c0 + m2.c0,
+                              m1.c1 + m2.c1,
+                              m1.c2 + m2.c2);
     }
 
     /**
@@ -342,8 +335,8 @@ namespace Phanes::Core::Math {
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    bool operator== (const TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    bool operator== (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
         return (m1[0] == m2[0] && m1[1] == m2[1] && m1[2] == m2[2]);
     }
@@ -355,8 +348,8 @@ namespace Phanes::Core::Math {
      * @param(m2) Matrix
      */
 
-    template<RealType T>
-    bool operator!= (const TMatrix3<T>& m1, const TMatrix3<T>& m2)
+    template<RealType T, bool S>
+    bool operator!= (const TMatrix3<T, S>& m1, const TMatrix3<T, S>& m2)
     {
         return (m1[0] != m2[0] || m1[1] != m2[1] || m1[2] != m2[2]);
     }
@@ -372,8 +365,8 @@ namespace Phanes::Core::Math {
      * @param(m1) Matrix
      */
     
-    template<RealType T>
-    T Determinant(const TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    T Determinant(const TMatrix3<T, S>& m1)
     {
         return   m1(0, 0) * (m1(1, 1) * m1(2, 2) - m1(1, 2) * m1(2, 1))
                - m1(0, 1) * (m1(1, 0) * m1(2, 2) - m1(1, 2) * m1(2, 0))
@@ -390,22 +383,22 @@ namespace Phanes::Core::Math {
      * @note Stores result in m1.
      */
 
-    template<RealType T>
-    TMatrix3<T> InverseV(TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    TMatrix3<T, S> InverseV(TMatrix3<T, S>& m1)
     {
-        const TVector3<T>& v0 = m1[0];
-        const TVector3<T>& v1 = m1[1];
-        const TVector3<T>& v2 = m1[2];
+        const TVector3<T, S>& v0 = m1[0];
+        const TVector3<T, S>& v1 = m1[1];
+        const TVector3<T, S>& v2 = m1[2];
 
-        TVector3<T> r0 = CrossP(v1, v2);
-        TVector3<T> r1 = CrossP(v2, v0);
-        TVector3<T> r2 = CrossP(v0, v1);
+        TVector3<T, S> r0 = CrossP(v1, v2);
+        TVector3<T, S> r1 = CrossP(v2, v0);
+        TVector3<T, S> r2 = CrossP(v0, v1);
 
-        T _1_det = (T)1.0 / determinant(m1);
+        T _1_det = (T)1.0 / Determinant(m1);
 
-        m1 = TMatrix3<T>(r0.x, r0.y, r0.z,
-                        r1.x, r1.y, r1.z,
-                        r2.x, r2.y, r2.z);
+        m1 = TMatrix3<T, S>(r0.x, r0.y, r0.z,
+                            r1.x, r1.y, r1.z,
+                            r2.x, r2.y, r2.z);
 
         m1 *= _1_det;
 
@@ -420,8 +413,8 @@ namespace Phanes::Core::Math {
      * @note Result is stored in m1;
      */
 
-    template<RealType T>
-    TMatrix3<T> TransposeV(TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    TMatrix3<T, S> TransposeV(TMatrix3<T, S>& m1)
     {
         Swap(m1(0, 1), m1(1, 0));
         Swap(m1(0, 2), m1(2, 0));
@@ -441,8 +434,8 @@ namespace Phanes::Core::Math {
      * @param(m1) Matrix
      */
 
-    template<RealType T>
-    TMatrix3<T> Inverse(TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    TMatrix3<T, S> Inverse(TMatrix3<T, S>& m1)
     {
         const TVector3<T>& v0 = m1[0];
         const TVector3<T>& v1 = m1[1];
@@ -454,7 +447,7 @@ namespace Phanes::Core::Math {
 
         T _1_det = (T)1.0 / Determinant(m1);
 
-        TMatrix3<T> inverse(r0.x, r0.y, r0.z,
+        TMatrix3<T, S> inverse(r0.x, r0.y, r0.z,
             r1.x, r1.y, r1.z,
             r2.x, r2.y, r2.z);
 
@@ -471,10 +464,10 @@ namespace Phanes::Core::Math {
      * @note Result is stored in m1;
      */
 
-    template<RealType T>
-    TMatrix3<T> Transpose(const TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    TMatrix3<T, S> Transpose(const TMatrix3<T, S>& m1)
     {
-        return TMatrix3<T>(m1(0, 0), m1(1, 0), m1(2, 0),
+        return TMatrix3<T, S>(m1(0, 0), m1(1, 0), m1(2, 0),
                            m1(0, 1), m1(1, 1), m1(2, 1),
                            m1(0, 2), m1(1, 2), m1(2, 2));
     }
@@ -483,12 +476,12 @@ namespace Phanes::Core::Math {
      * Checks if matrix is an identity matrix.
      */
 
-    template<RealType T>
-    bool IsIndentityMatrix(const TMatrix3<T>& m1)
+    template<RealType T, bool S>
+    bool IsIdentityMatrix(const TMatrix3<T, S>& m1)
     {
-        return (abs(m1(0, 0) - (T)1.0) < P_FLT_INAC && abs(m1(0, 1) - (T)1.0) < P_FLT_INAC && abs(m1(0, 2) - (T)1.0) < P_FLT_INAC &&
-                abs(m1(1, 0) - (T)1.0) < P_FLT_INAC && abs(m1(1, 1) - (T)1.0) < P_FLT_INAC && abs(m1(1, 2) - (T)1.0) < P_FLT_INAC &&
-                abs(m1(2, 0) - (T)1.0) < P_FLT_INAC && abs(m1(2, 1) - (T)1.0) < P_FLT_INAC && abs(m1(2, 2) - (T)1.0) < P_FLT_INAC);
+        return (abs(m1(0, 0) - (T)1.0) < P_FLT_INAC && abs(m1(0, 1) - (T)0.0) < P_FLT_INAC && abs(m1(0, 2) - (T)0.0) < P_FLT_INAC &&
+                abs(m1(1, 0) - (T)0.0) < P_FLT_INAC && abs(m1(1, 1) - (T)1.0) < P_FLT_INAC && abs(m1(1, 2) - (T)0.0) < P_FLT_INAC &&
+                abs(m1(2, 0) - (T)0.0) < P_FLT_INAC && abs(m1(2, 1) - (T)0.0) < P_FLT_INAC && abs(m1(2, 2) - (T)1.0) < P_FLT_INAC);
     }
 
 } // Phanes::Core::Math
diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp
index e69de29..3c6bd20 100644
--- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
+#include "Core/public/Math/MathCommon.hpp"
+
+
+namespace Phanes::Core::Math::SIMD
+{
+    /// <summary>
+    /// Adds all scalars of the vector.
+    /// </summary>
+    /// <param name="v">Vector</param>
+    /// <returns>Sum stored in v[0:31].</returns>
+    Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
+    {
+        Phanes::Core::Types::Vec4f32Reg r;
+        r.data[0] = v.data[0] + v.data[1] + v.data[2] + v.data[3];
+    }
+    
+    /// <summary>
+    /// Adds all scalars of the vector.
+    /// </summary>
+    /// <param name="v">Vector</param>
+    /// <returns>Sum of components.</returns>
+    float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
+    {
+        return v.data[0] + v.data[1] + v.data[2] + v.data[3];
+    }
+    
+    /// <summary>
+    /// Gets the absolute value of each scalar in the vector.
+    /// </summary>
+    /// <param name="v">Vector</param>
+    /// <returns>Vector with all components positive.</returns>
+    Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
+    {
+        Phanes::Core::Types::Vec4f32Reg r;
+
+        r.data[0] = Abs(v.data[0]);
+    }
+    
+    /// <summary>
+    /// Gets the dot product of the 
+    /// </summary>
+    /// <param name="v1"></param>
+    /// <param name="v2"></param>
+    /// <returns></returns>
+    Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
+    {
+        Phanes::Core::Types::Vec4f32Reg r;
+        r.data[0] = v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
+
+        return r;
+    }
+    
+    /// <summary>
+    /// Gets the dot product of the 
+    /// </summary>
+    /// <param name="v1"></param>
+    /// <param name="v2"></param>
+    /// <returns></returns>
+    float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
+    {
+        return v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
+    }
+}
\ No newline at end of file
diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
index c3ddc84..eeb20d6 100644
--- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp
@@ -22,6 +22,19 @@
 
 namespace Phanes::Core::Math::SIMD
 {
+    Phanes::Core::Types::Vec4f32Reg vec4_cross_p(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
+    {
+        __m128 tmp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
+        __m128 tmp1 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
+        __m128 tmp2 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
+        __m128 tmp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
+        return _mm_sub_ps(
+            _mm_mul_ps(tmp0, tmp1),
+            _mm_mul_ps(tmp2, tmp3)
+        );
+    }
+
+
     /// <summary>
     /// Adds all scalars of the vector.
     /// </summary>
@@ -247,6 +260,15 @@ namespace Phanes::Core::Math::Detail
     template<> struct compute_vec3_inc<float, true> : public compute_vec4_inc<float, true> {};
     template<> struct compute_vec3_dec<float, true> : public compute_vec4_dec<float, true> {};
 
+    template<>
+    struct compute_vec3_cross_p<float, true>
+    {
+        static FORCEINLINE void map(Phanes::Core::Math::TVector3<float, true>& r, const Phanes::Core::Math::TVector3<float, true>& v1, const Phanes::Core::Math::TVector3<float, true>& v2)
+        {
+            r.data = Phanes::Core::Math::SIMD::vec4_cross_p(v1.data, v2.data);
+        }
+    };
+
     // ============ //
     //   TVector2   //
     // ============ //
diff --git a/Engine/Source/Runtime/Core/public/Math/Vector3.hpp b/Engine/Source/Runtime/Core/public/Math/Vector3.hpp
index e3cf5c2..b7ed19b 100644
--- a/Engine/Source/Runtime/Core/public/Math/Vector3.hpp
+++ b/Engine/Source/Runtime/Core/public/Math/Vector3.hpp
@@ -504,18 +504,7 @@ namespace Phanes::Core::Math {
      */
 
     template<RealType T>
-    TVector3<T, false> CrossPV(TVector3<T, false>& v1, const TVector3<T, false>& v2)
-    {
-        float x = v1.x;
-        float y = v1.y;
-        float z = v1.z;
-
-        v1.x = (y * v2.z) - (z * v2.y);
-        v1.y = (z * v2.x) - (x * v2.z);
-        v1.z = (x * v2.y) - (y * v2.x);
-
-        return v1;
-    }
+    TVector3<T, false> CrossPV(TVector3<T, false>& v1, const TVector3<T, false>& v2);
 
     /**
      * Gets the componentwise max of both vectors.
@@ -991,12 +980,7 @@ namespace Phanes::Core::Math {
      */
 
     template<RealType T>
-    TVector3<T, false> CrossP(const TVector3<T, false>& v1, const TVector3<T, false>& v2)
-    {
-        return TVector3<T, false>((v1.y * v2.z) - (v1.z * v2.y),
-                           (v1.z * v2.x) - (v1.x * v2.z),
-                           (v1.x * v2.y) - (v1.y * v2.x));
-    }
+    TVector3<T, false> CrossP(const TVector3<T, false>& v1, const TVector3<T, false>& v2);
 
     /**
      * Linearly interpolates between two vectors.
diff --git a/Engine/Source/Runtime/Core/public/Math/Vector3.inl b/Engine/Source/Runtime/Core/public/Math/Vector3.inl
index 6702ac1..f40271a 100644
--- a/Engine/Source/Runtime/Core/public/Math/Vector3.inl
+++ b/Engine/Source/Runtime/Core/public/Math/Vector3.inl
@@ -208,4 +208,21 @@ namespace Phanes::Core::Math
     {
         return --v1;
     }
+
+
+    // Other
+
+    template<RealType T, bool S>
+    TVector3<T, S> CrossP(const TVector3<T, S>& v1, const TVector3<T, S>& v2)
+    {
+        TVector3<T, S> r;
+        Detail::compute_vec3_cross_p<T, S>::map(r, v1, v2);
+        return r;
+    }
+
+    template<RealType T, bool S>
+    TVector3<T, S> CrossPV(TVector3<T, S>& v1, const TVector3<T, S>& v2)
+    {
+        Detail::compute_vec3_cross_p<T, S>::map(v1, v1, v2);
+    }
 }
\ No newline at end of file