diff --git a/Engine/Source/Runtime/Core/public/Math/MathFwd.h b/Engine/Source/Runtime/Core/public/Math/MathFwd.h index bf9a53f..d22d332 100644 --- a/Engine/Source/Runtime/Core/public/Math/MathFwd.h +++ b/Engine/Source/Runtime/Core/public/Math/MathFwd.h @@ -24,28 +24,28 @@ namespace Phanes::Core::Math { * Template forward declarations. */ - template struct TColor; - template struct TLinearColor; - template struct TVector2; - template struct TVector3; - template struct TVector4; - template struct TRay; - template struct TLine; - template struct TPlane; - template struct TMatrix2; - template struct TMatrix3; - template struct TMatrix4; - template struct TQuaternion; - template struct TTransform; - template struct TPoint2; - template struct TPoint3; - template struct TPoint4; - template struct TIntVector2; - template struct TIntVector3; - template struct TIntVector4; - template struct TIntPoint2; - template struct TIntPoint3; - template struct TIntPoint4; + template struct TColor; + template struct TLinearColor; + template struct TVector2; + template struct TVector3; + template struct TRay; + template struct TLine; + template struct TPlane; + template struct TMatrix2; + template struct TMatrix3; + template struct TMatrix4; + template struct TQuaternion; + template struct TTransform; + template struct TPoint2; + template struct TPoint3; + template struct TPoint4; + template struct TIntVector2; + template struct TIntVector3; + template struct TIntVector4; + template struct TIntPoint2; + template struct TIntPoint3; + template struct TIntPoint4; + template struct TVector4; /** * Specific instantiation of forward declarations. diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMD.h b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMD.h index 7151fcf..92c7812 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMD.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesSIMD.h @@ -3,80 +3,91 @@ // ARM is not supported. #include "Core/public/Math/SIMD/Platform.h" +#include "Core/public/Math/MathTypes.h" - -#include // SSE4.2 - -#ifdef __AVX__ -# include +#if P_INTRINSICS == P_INTRINSICS_AVX2 +# include "PhanesVectorMathAVX2.hpp" +#elif P_INTRINSICS == P_INTRINSICS_AVX +# include "PhanesVectorMathAVX.hpp" +#elif P_INTRINSICS == P_INTRINSICS_SSE +# include "PhanesVectorMathSSE.hpp" +#elif P_INTRINSICS == P_INTRINSICS_NEON +# include "PhanesVectorMathNeon.hpp" +#elif P_INTRINSICS == P_INTRINSICS_FPU +# include "PhanesVectorMathFPU.hpp" #endif -namespace Phanes::Core::Math::SIMD +// Register aliases +namespace Phanes::Core::Types { - // XMM Register wrapper for 4x1 floats +#if P_INTRINSICS >= 1 - struct VectorRegister4f - { - public: - __m128 data; - }; + typedef __m128 Vec4f32Reg; + typedef __m128d Vec2f64Reg; - typedef VectorRegister4f VectorRegister4f32; + typedef __m128i Vec4i32Reg; + typedef __m128i Vec2i64Reg; + + typedef __m128i Vec4u32Reg; + typedef __m128i Vec2u64Reg; + +#elif P_INTRINSICS != P_INTRINSICS_NEON + + typedef struct alignas(16) Vec4f32Reg { float data[4]; } Vec4f32Reg; + typedef struct alignas(16) Vec2f64Reg { double data[2]; } Vec2f64Reg; + typedef struct alignas(16) Vec4i32Reg { int data[4]; } Vec4i32Reg; + typedef struct alignas(16) Vec2i64Reg { Phanes::Core::Types::int64 data[2]; } Vec2i64Reg; + typedef struct alignas(16) Vec4u32Reg { unsigned int data[4]; } Vec4u32Reg; + typedef struct alignas(16) Vec2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2u64Reg; + +#endif +#if P_INTRINSICS >= 2 - // XMM Register wrapper for 2x1 doubles - struct VectorRegister2d - { - public: - __m128d data; - }; + typedef __m256 Vec4x2f32Reg; + typedef __m256 Vec8f32Reg; + typedef __m256d Vec2x2f64Reg; + typedef __m256d Vec4f64Reg; - typedef VectorRegister2d VectorRegister2f64; +#elif P_INTRINSICS != P_INTRINSICS_NEON + + typedef struct alignas(32) Vec4x2f32Reg { float data[8]; } Vec4x2f32Reg; + typedef struct alignas(32) Vec8f32Reg { float data[8]; } Vec8f32Reg; + typedef struct alignas(32) Vec2x2f64Reg { double data[4]; } Vec2x2f64Reg; + typedef struct alignas(32) Vec4f64Reg { double data[4]; } Vec4f64Reg; + +#endif +#if P_INTRINSICS == 3 - // XMM Register wrapper for 4x1 integers - struct VectorRegister4i - { - public: - __m128i data; - }; + typedef __m256i Vec4x2i32Reg; + typedef __m256i Vec8i32Reg; + typedef __m256i Vec2x2i64Reg; + typedef __m256i Vec4i64Reg; - typedef VectorRegister4i VectorRegister4i32; + typedef __m256i Vec4x2u32Reg; + typedef __m256i Vec8u32Reg; + typedef __m256i Vec2x2u64Reg; + typedef __m256i Vec4u64Reg; +#elif P_INTRINSICS != P_INTRINSICS_NEON + typedef struct alignas(32) Vec4x2i32Reg { int data[8]; } Vec4x2i32Reg; + typedef struct alignas(32) Vec8i32Reg { int data[8]; } Vec8i32Reg; + typedef struct alignas(32) Vec2x2i64Reg { Phanes::Core::Types::int64 data[4]; } Vec2x2i64Reg; + typedef struct alignas(32) Vec4i64Reg { Phanes::Core::Types::int64 data[4]; } Vec4i64Reg; -# ifdef __AVX__ + typedef struct alignas(32) Vec4x2u32Reg { unsigned int data[8]; } Vec4x2u32Reg; + typedef struct alignas(32) Vec8u32Reg { unsigned int data[8]; } Vec8u32Reg; + typedef struct alignas(32) Vec2x2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2x2u64Reg; + typedef struct alignas(32) Vec4u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec4u64Reg; - // AVX specific types +#endif - // XMM Register wrapper for 4x1 doubles - struct VectorRegister4d - { - public: - __m256d data; - }; - - typedef VectorRegister4d VectorRegister4f64; - -# endif - - -# ifdef __AVX2__ - - // AVX2 specific types - - // XMM Register wrapper for 4x1 doubles - struct VectorRegister4i64 - { - public: - __m256i data; - }; - - -# endif + // NEON ... } \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX.hpp new file mode 100644 index 0000000..b9ccf66 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include "PhanesVectorMathSSE.hpp" // Include previous + +#include + diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX2.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX2.hpp new file mode 100644 index 0000000..409deda --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathAVX2.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "PhanesVectorMathAVX.hpp" // Include previous \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp new file mode 100644 index 0000000..6f70f09 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathFPU.hpp @@ -0,0 +1 @@ +#pragma once diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathNeon.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathNeon.hpp new file mode 100644 index 0000000..5d661d5 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathNeon.hpp @@ -0,0 +1,2 @@ +#pragma once +#error ARM architecture is not yet supported by PhanesEngine. \ No newline at end of file diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp new file mode 100644 index 0000000..4249084 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/PhanesVectorMathSSE.hpp @@ -0,0 +1,5 @@ +#pragma once + +#include + + diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h index 50976c4..0b9e65a 100644 --- a/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Platform.h @@ -2,26 +2,46 @@ #pragma once +// Architecture MACRO +// Implicitly asumes x86 architecture +#ifndef P_ARM_ARCH +# define P_x86_ARCH +#else +# ifdef P_x86_ARCH +# undef P_x86_ARCH +# endif +# error ARM architecture not supported. +#endif + // Set platform MACRO depending on defined build + +#define P_PLATFORM_WIN 0 +#define P_PLATFORM_LIN 1 +#define P_PLATFORM_MAC 2 +// #define P_PLATFORM_FBSD 3 -> Is planed for eventual PS5 support + // User defines build platform #ifdef P_WIN_BUILD - #define P_PLATFORM 0 +# define P_PLATFORM P_PLATFORM_WIN #elif P_LINUX_BUILD - #define P_PLATFORM 1 -#elif P_APPLE_BUILD - #define P_PLATFORM 2 -#elif P_PS5_BUILD - #define P_PLATFORM 3 +# define P_PLATFORM P_PLATFORM_LIN +# error Linux / Unix system is not yet supported. +#elif P_MAC_BUILD +# define P_PLATFORM P_PLATFORM_MAC +# error Mac target system is not yet supported. +#elif P_PS5_BUILD || P_FBSD_BUILD +# define P_PLATFORM P_PLATFORM_FBSD +# error FreeBSD is not yet supported. #else - #error Your target system is either not supported, or you have yet to define it. +# error Your target system is either not supported, or you have yet to define it. #endif // Set compiler depending on defined compiler // Compiler macro definition -// ID's defines like [0-9][0-x] -// First bracket is compiler, second is the version of the compiler. +// ID's defined like [0-9][0-x] +// First bracket defines compiler, second defines the version of the compiler. // Visual C++ #define P_COMPILER_VC22 001 @@ -122,7 +142,7 @@ // Clang #elif (defined(__clang__)) - +# error PhanesEngine only supports MSVC -> Visual Studio # if defined(__apple_build_version__) # # if (__clang_major__ < 6) @@ -188,6 +208,7 @@ // G++ #elif defined(__GNUC__) || defined(__MINGW32__) +# error PhanesEngine only supports MSVC -> Visual Studio # if __GNUC__ >= 14 # define P_COMPILER P_COMPILER_GCC14 # elif __GNUC__ >= 13 @@ -225,3 +246,69 @@ #endif + + + +// Vector instruction sets + + +// Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...). + +#ifdef P_FORCE_INTRINSICS + +# undef __AVX2__ +# undef __AVX__ +# undef __SSE__ + +# ifndef P_INTRINSICS +# error P_INTRINSICS must be defined by the user, when P_FORCE_INTRINSICS is used. +# endif + +#else + +# ifdef __AVX2__ +# define P_AVX2__ +# elif defined(__AVX__) +# define P_AVX__ +# elif defined(__SSE__) +# define P_SSE__ +# endif + + +#endif // !P_FORCE_INTRINSICS + +#ifdef P_AVX2__ +# define P_AVX__ +#endif + +#ifdef P_AVX__ +# define P_SSE__ +#endif + + +#define P_INTRINSICS_FPU 0 +#define P_INTRINSICS_SSE 1 +#define P_INTRINSICS_AVX 2 +#define P_INTRINSICS_AVX2 3 +#define P_INTRINSICS_NEON 4 + + +#if defined(P_FORCE_FPU) // Force, that no intrinsics may be used. +# define P_INTRINSICS P_INTRINSICS_FPU +# undef P_AVX2__ +# undef P_AVX__ +# undef P_SSE__ +# undef P_SSE__ +#else +# if defined(P_AVX__) && !defined(P_AVX2__) +# define P_INTRINSICS P_INTRINSICS_AVX +# elif defined(P_AVX2__) +# define P_INTRINSICS P_INTRINSICS_AVX2 +# elif (defined(__SSE__) || defined(P_SSE__)) && !defined(P_AVX__) +# define P_INTRINSICS P_INTRINSICS_SSE +# elif defined(P_ARM_ARCH) +# define P_INTRINSICS P_INTRINSICS_NEON +# elif !defined(P_FORCE_INTRINSICS) +# error No SIMD instruction set detected. Use P_FORCE_FPU to disable SIMD extensions. +# endif +#endif diff --git a/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h b/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h new file mode 100644 index 0000000..fa2b854 --- /dev/null +++ b/Engine/Source/Runtime/Core/public/Math/SIMD/Storage.h @@ -0,0 +1,179 @@ +// Defines on compile time, whether a xmm register or an array should be used. +#pragma once + +#include "Core/public/Math/SIMD/PhanesSIMD.h" + +#include "Core/public/Math/MathTypes.h" + +namespace Phanes::Core::SIMD +{ + template + struct Storage; + + // General unaligned memory storage + template + struct Storage + { + typedef struct type { + T data[L]; + } type; + + }; + + template + struct Storage<3, T, false> + { + typedef struct type { + T data[4]; + } type; + }; + + + // SSE4.2 + + template<> + struct Storage<4, float, true> + { + typedef Phanes::Core::Types::Vec4f32Reg type; + }; + + template<> + struct Storage<3, float, true> + { + typedef Phanes::Core::Types::Vec4f32Reg type; + }; + + template<> + struct Storage<4, int, true> + { + typedef Phanes::Core::Types::Vec4i32Reg type; + }; + + template<> + struct Storage<3, int, true> + { + typedef Phanes::Core::Types::Vec4i32Reg type; + }; + + template<> + struct Storage<4, unsigned int, true> + { + typedef Phanes::Core::Types::Vec4u32Reg type; + }; + + template<> + struct Storage<3, unsigned int, true> + { + typedef Phanes::Core::Types::Vec4u32Reg type; + }; + + template<> + struct Storage<2, double, true> + { + typedef Phanes::Core::Types::Vec2f64Reg type; + }; + + template<> + struct Storage<2, Phanes::Core::Types::int64, true> + { + typedef Phanes::Core::Types::Vec2i64Reg type; + }; + + template<> + struct Storage<2, Phanes::Core::Types::uint64, true> + { + typedef Phanes::Core::Types::Vec2u64Reg type; + }; + + + // AVX + template<> + struct Storage<4, double, true> + { + typedef Phanes::Core::Types::Vec4f64Reg type; + }; + + template<> + struct Storage<3, double, true> + { + typedef Phanes::Core::Types::Vec4f64Reg type; + }; + + template<> + struct Storage<8, float, true> + { + typedef Phanes::Core::Types::Vec4x2f32Reg type; + }; + + + // AVX2 + template<> + struct Storage<4, Phanes::Core::Types::int64, true> + { + typedef Phanes::Core::Types::Vec4i64Reg type; + }; + + template<> + struct Storage<3, Phanes::Core::Types::int64, true> + { + typedef Phanes::Core::Types::Vec4i64Reg type; + }; + + template<> + struct Storage<4, Phanes::Core::Types::uint64, true> + { + typedef Phanes::Core::Types::Vec4u64Reg type; + }; + + template<> + struct Storage<3, Phanes::Core::Types::uint64, true> + { + typedef Phanes::Core::Types::Vec4u64Reg type; + }; + + template<> + struct Storage<8, int, true> + { + typedef Phanes::Core::Types::Vec4x2i32Reg type; + }; + + template<> + struct Storage<8, unsigned int, true> + { + typedef Phanes::Core::Types::Vec4x2u32Reg type; + }; +} + + +struct Vec4 +{ +public: + union + { + struct + { + + int x, y, z, w; + + }; + + typename Phanes::Core::SIMD::Storage<4, Phanes::Core::Types::int32, true>::type comp; + }; +}; + +struct Vec4x2 +{ +public: + union + { + struct + { + + Vec4 v1; + Vec4 v2; + + }; + + typename Phanes::Core::SIMD::Storage<8, Phanes::Core::Types::int32, true>::type comp; + }; +}; \ No newline at end of file