Create SIMD boilerplate code for detection and fallbacks.

This commit is contained in:
scorpioblood 2024-05-24 23:43:26 +02:00
parent bb98da5e79
commit 676ee84774
9 changed files with 380 additions and 86 deletions

View File

@ -28,7 +28,6 @@ namespace Phanes::Core::Math {
template<RealType T> struct TLinearColor; template<RealType T> struct TLinearColor;
template<RealType T> struct TVector2; template<RealType T> struct TVector2;
template<RealType T> struct TVector3; template<RealType T> struct TVector3;
template<RealType T> struct TVector4;
template<RealType T> struct TRay; template<RealType T> struct TRay;
template<RealType T> struct TLine; template<RealType T> struct TLine;
template<RealType T> struct TPlane; template<RealType T> struct TPlane;
@ -46,6 +45,7 @@ namespace Phanes::Core::Math {
template<IntType T> struct TIntPoint2; template<IntType T> struct TIntPoint2;
template<IntType T> struct TIntPoint3; template<IntType T> struct TIntPoint3;
template<IntType T> struct TIntPoint4; template<IntType T> struct TIntPoint4;
template<RealType T, bool IsAligned> struct TVector4;
/** /**
* Specific instantiation of forward declarations. * Specific instantiation of forward declarations.

View File

@ -3,80 +3,91 @@
// ARM is not supported. // ARM is not supported.
#include "Core/public/Math/SIMD/Platform.h" #include "Core/public/Math/SIMD/Platform.h"
#include "Core/public/Math/MathTypes.h"
#if P_INTRINSICS == P_INTRINSICS_AVX2
#include <nmmintrin.h> // SSE4.2 # include "PhanesVectorMathAVX2.hpp"
#elif P_INTRINSICS == P_INTRINSICS_AVX
#ifdef __AVX__ # include "PhanesVectorMathAVX.hpp"
# include <immintrin.h> #elif P_INTRINSICS == P_INTRINSICS_SSE
# include "PhanesVectorMathSSE.hpp"
#elif P_INTRINSICS == P_INTRINSICS_NEON
# include "PhanesVectorMathNeon.hpp"
#elif P_INTRINSICS == P_INTRINSICS_FPU
# include "PhanesVectorMathFPU.hpp"
#endif #endif
namespace Phanes::Core::Math::SIMD // Register aliases
namespace Phanes::Core::Types
{ {
// XMM Register wrapper for 4x1 floats #if P_INTRINSICS >= 1
struct VectorRegister4f typedef __m128 Vec4f32Reg;
{ typedef __m128d Vec2f64Reg;
public:
__m128 data;
};
typedef VectorRegister4f VectorRegister4f32; typedef __m128i Vec4i32Reg;
typedef __m128i Vec2i64Reg;
typedef __m128i Vec4u32Reg;
typedef __m128i Vec2u64Reg;
#elif P_INTRINSICS != P_INTRINSICS_NEON
// XMM Register wrapper for 2x1 doubles typedef struct alignas(16) Vec4f32Reg { float data[4]; } Vec4f32Reg;
struct VectorRegister2d typedef struct alignas(16) Vec2f64Reg { double data[2]; } Vec2f64Reg;
{ typedef struct alignas(16) Vec4i32Reg { int data[4]; } Vec4i32Reg;
public: typedef struct alignas(16) Vec2i64Reg { Phanes::Core::Types::int64 data[2]; } Vec2i64Reg;
__m128d data; typedef struct alignas(16) Vec4u32Reg { unsigned int data[4]; } Vec4u32Reg;
}; typedef struct alignas(16) Vec2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2u64Reg;
typedef VectorRegister2d VectorRegister2f64;
// XMM Register wrapper for 4x1 integers
struct VectorRegister4i
{
public:
__m128i data;
};
typedef VectorRegister4i VectorRegister4i32;
# ifdef __AVX__
// AVX specific types
// XMM Register wrapper for 4x1 doubles
struct VectorRegister4d
{
public:
__m256d data;
};
typedef VectorRegister4d VectorRegister4f64;
#endif #endif
# ifdef __AVX2__ #if P_INTRINSICS >= 2
// AVX2 specific types typedef __m256 Vec4x2f32Reg;
typedef __m256 Vec8f32Reg;
typedef __m256d Vec2x2f64Reg;
typedef __m256d Vec4f64Reg;
// XMM Register wrapper for 4x1 doubles #elif P_INTRINSICS != P_INTRINSICS_NEON
struct VectorRegister4i64
{
public:
__m256i data;
};
typedef struct alignas(32) Vec4x2f32Reg { float data[8]; } Vec4x2f32Reg;
typedef struct alignas(32) Vec8f32Reg { float data[8]; } Vec8f32Reg;
typedef struct alignas(32) Vec2x2f64Reg { double data[4]; } Vec2x2f64Reg;
typedef struct alignas(32) Vec4f64Reg { double data[4]; } Vec4f64Reg;
#endif #endif
#if P_INTRINSICS == 3
typedef __m256i Vec4x2i32Reg;
typedef __m256i Vec8i32Reg;
typedef __m256i Vec2x2i64Reg;
typedef __m256i Vec4i64Reg;
typedef __m256i Vec4x2u32Reg;
typedef __m256i Vec8u32Reg;
typedef __m256i Vec2x2u64Reg;
typedef __m256i Vec4u64Reg;
#elif P_INTRINSICS != P_INTRINSICS_NEON
typedef struct alignas(32) Vec4x2i32Reg { int data[8]; } Vec4x2i32Reg;
typedef struct alignas(32) Vec8i32Reg { int data[8]; } Vec8i32Reg;
typedef struct alignas(32) Vec2x2i64Reg { Phanes::Core::Types::int64 data[4]; } Vec2x2i64Reg;
typedef struct alignas(32) Vec4i64Reg { Phanes::Core::Types::int64 data[4]; } Vec4i64Reg;
typedef struct alignas(32) Vec4x2u32Reg { unsigned int data[8]; } Vec4x2u32Reg;
typedef struct alignas(32) Vec8u32Reg { unsigned int data[8]; } Vec8u32Reg;
typedef struct alignas(32) Vec2x2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2x2u64Reg;
typedef struct alignas(32) Vec4u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec4u64Reg;
#endif
// NEON ...
} }

View File

@ -0,0 +1,6 @@
#pragma once
#include "PhanesVectorMathSSE.hpp" // Include previous
#include <immintrin.h>

View File

@ -0,0 +1,3 @@
#pragma once
#include "PhanesVectorMathAVX.hpp" // Include previous

View File

@ -0,0 +1 @@
#pragma once

View File

@ -0,0 +1,2 @@
#pragma once
#error ARM architecture is not yet supported by PhanesEngine.

View File

@ -0,0 +1,5 @@
#pragma once
#include <nmmintrin.h>

View File

@ -2,16 +2,36 @@
#pragma once #pragma once
// Architecture MACRO
// Implicitly asumes x86 architecture
#ifndef P_ARM_ARCH
# define P_x86_ARCH
#else
# ifdef P_x86_ARCH
# undef P_x86_ARCH
# endif
# error ARM architecture not supported.
#endif
// Set platform MACRO depending on defined build // Set platform MACRO depending on defined build
#define P_PLATFORM_WIN 0
#define P_PLATFORM_LIN 1
#define P_PLATFORM_MAC 2
// #define P_PLATFORM_FBSD 3 -> Is planed for eventual PS5 support
// User defines build platform // User defines build platform
#ifdef P_WIN_BUILD #ifdef P_WIN_BUILD
#define P_PLATFORM 0 # define P_PLATFORM P_PLATFORM_WIN
#elif P_LINUX_BUILD #elif P_LINUX_BUILD
#define P_PLATFORM 1 # define P_PLATFORM P_PLATFORM_LIN
#elif P_APPLE_BUILD # error Linux / Unix system is not yet supported.
#define P_PLATFORM 2 #elif P_MAC_BUILD
#elif P_PS5_BUILD # define P_PLATFORM P_PLATFORM_MAC
#define P_PLATFORM 3 # error Mac target system is not yet supported.
#elif P_PS5_BUILD || P_FBSD_BUILD
# define P_PLATFORM P_PLATFORM_FBSD
# error FreeBSD is not yet supported.
#else #else
# error Your target system is either not supported, or you have yet to define it. # error Your target system is either not supported, or you have yet to define it.
#endif #endif
@ -20,8 +40,8 @@
// Compiler macro definition // Compiler macro definition
// ID's defines like [0-9][0-x] // ID's defined like [0-9][0-x]
// First bracket is compiler, second is the version of the compiler. // First bracket defines compiler, second defines the version of the compiler.
// Visual C++ // Visual C++
#define P_COMPILER_VC22 001 #define P_COMPILER_VC22 001
@ -122,7 +142,7 @@
// Clang // Clang
#elif (defined(__clang__)) #elif (defined(__clang__))
# error PhanesEngine only supports MSVC -> Visual Studio
# if defined(__apple_build_version__) # if defined(__apple_build_version__)
# #
# if (__clang_major__ < 6) # if (__clang_major__ < 6)
@ -188,6 +208,7 @@
// G++ // G++
#elif defined(__GNUC__) || defined(__MINGW32__) #elif defined(__GNUC__) || defined(__MINGW32__)
# error PhanesEngine only supports MSVC -> Visual Studio
# if __GNUC__ >= 14 # if __GNUC__ >= 14
# define P_COMPILER P_COMPILER_GCC14 # define P_COMPILER P_COMPILER_GCC14
# elif __GNUC__ >= 13 # elif __GNUC__ >= 13
@ -225,3 +246,69 @@
#endif #endif
// Vector instruction sets
// Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...).
#ifdef P_FORCE_INTRINSICS
# undef __AVX2__
# undef __AVX__
# undef __SSE__
# ifndef P_INTRINSICS
# error P_INTRINSICS must be defined by the user, when P_FORCE_INTRINSICS is used.
# endif
#else
# ifdef __AVX2__
# define P_AVX2__
# elif defined(__AVX__)
# define P_AVX__
# elif defined(__SSE__)
# define P_SSE__
# endif
#endif // !P_FORCE_INTRINSICS
#ifdef P_AVX2__
# define P_AVX__
#endif
#ifdef P_AVX__
# define P_SSE__
#endif
#define P_INTRINSICS_FPU 0
#define P_INTRINSICS_SSE 1
#define P_INTRINSICS_AVX 2
#define P_INTRINSICS_AVX2 3
#define P_INTRINSICS_NEON 4
#if defined(P_FORCE_FPU) // Force, that no intrinsics may be used.
# define P_INTRINSICS P_INTRINSICS_FPU
# undef P_AVX2__
# undef P_AVX__
# undef P_SSE__
# undef P_SSE__
#else
# if defined(P_AVX__) && !defined(P_AVX2__)
# define P_INTRINSICS P_INTRINSICS_AVX
# elif defined(P_AVX2__)
# define P_INTRINSICS P_INTRINSICS_AVX2
# elif (defined(__SSE__) || defined(P_SSE__)) && !defined(P_AVX__)
# define P_INTRINSICS P_INTRINSICS_SSE
# elif defined(P_ARM_ARCH)
# define P_INTRINSICS P_INTRINSICS_NEON
# elif !defined(P_FORCE_INTRINSICS)
# error No SIMD instruction set detected. Use P_FORCE_FPU to disable SIMD extensions.
# endif
#endif

View File

@ -0,0 +1,179 @@
// Defines on compile time, whether a xmm register or an array should be used.
#pragma once
#include "Core/public/Math/SIMD/PhanesSIMD.h"
#include "Core/public/Math/MathTypes.h"
namespace Phanes::Core::SIMD
{
template<size_t L, typename T, bool IsAligned>
struct Storage;
// General unaligned memory storage
template<size_t L, typename T>
struct Storage<L, T, false>
{
typedef struct type {
T data[L];
} type;
};
template<typename T>
struct Storage<3, T, false>
{
typedef struct type {
T data[4];
} type;
};
// SSE4.2
template<>
struct Storage<4, float, true>
{
typedef Phanes::Core::Types::Vec4f32Reg type;
};
template<>
struct Storage<3, float, true>
{
typedef Phanes::Core::Types::Vec4f32Reg type;
};
template<>
struct Storage<4, int, true>
{
typedef Phanes::Core::Types::Vec4i32Reg type;
};
template<>
struct Storage<3, int, true>
{
typedef Phanes::Core::Types::Vec4i32Reg type;
};
template<>
struct Storage<4, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4u32Reg type;
};
template<>
struct Storage<3, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4u32Reg type;
};
template<>
struct Storage<2, double, true>
{
typedef Phanes::Core::Types::Vec2f64Reg type;
};
template<>
struct Storage<2, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec2i64Reg type;
};
template<>
struct Storage<2, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec2u64Reg type;
};
// AVX
template<>
struct Storage<4, double, true>
{
typedef Phanes::Core::Types::Vec4f64Reg type;
};
template<>
struct Storage<3, double, true>
{
typedef Phanes::Core::Types::Vec4f64Reg type;
};
template<>
struct Storage<8, float, true>
{
typedef Phanes::Core::Types::Vec4x2f32Reg type;
};
// AVX2
template<>
struct Storage<4, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec4i64Reg type;
};
template<>
struct Storage<3, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec4i64Reg type;
};
template<>
struct Storage<4, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec4u64Reg type;
};
template<>
struct Storage<3, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec4u64Reg type;
};
template<>
struct Storage<8, int, true>
{
typedef Phanes::Core::Types::Vec4x2i32Reg type;
};
template<>
struct Storage<8, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4x2u32Reg type;
};
}
struct Vec4
{
public:
union
{
struct
{
int x, y, z, w;
};
typename Phanes::Core::SIMD::Storage<4, Phanes::Core::Types::int32, true>::type comp;
};
};
struct Vec4x2
{
public:
union
{
struct
{
Vec4 v1;
Vec4 v2;
};
typename Phanes::Core::SIMD::Storage<8, Phanes::Core::Types::int32, true>::type comp;
};
};