Create SIMD boilerplate code for detection and fallbacks.

This commit is contained in:
scorpioblood 2024-05-24 23:43:26 +02:00
parent bb98da5e79
commit 676ee84774
9 changed files with 380 additions and 86 deletions

View File

@ -24,28 +24,28 @@ namespace Phanes::Core::Math {
* Template forward declarations.
*/
template<RealType T> struct TColor;
template<RealType T> struct TLinearColor;
template<RealType T> struct TVector2;
template<RealType T> struct TVector3;
template<RealType T> struct TVector4;
template<RealType T> struct TRay;
template<RealType T> struct TLine;
template<RealType T> struct TPlane;
template<RealType T> struct TMatrix2;
template<RealType T> struct TMatrix3;
template<RealType T> struct TMatrix4;
template<RealType T> struct TQuaternion;
template<RealType T> struct TTransform;
template<RealType T> struct TPoint2;
template<RealType T> struct TPoint3;
template<RealType T> struct TPoint4;
template<IntType T> struct TIntVector2;
template<IntType T> struct TIntVector3;
template<IntType T> struct TIntVector4;
template<IntType T> struct TIntPoint2;
template<IntType T> struct TIntPoint3;
template<IntType T> struct TIntPoint4;
template<RealType T> struct TColor;
template<RealType T> struct TLinearColor;
template<RealType T> struct TVector2;
template<RealType T> struct TVector3;
template<RealType T> struct TRay;
template<RealType T> struct TLine;
template<RealType T> struct TPlane;
template<RealType T> struct TMatrix2;
template<RealType T> struct TMatrix3;
template<RealType T> struct TMatrix4;
template<RealType T> struct TQuaternion;
template<RealType T> struct TTransform;
template<RealType T> struct TPoint2;
template<RealType T> struct TPoint3;
template<RealType T> struct TPoint4;
template<IntType T> struct TIntVector2;
template<IntType T> struct TIntVector3;
template<IntType T> struct TIntVector4;
template<IntType T> struct TIntPoint2;
template<IntType T> struct TIntPoint3;
template<IntType T> struct TIntPoint4;
template<RealType T, bool IsAligned> struct TVector4;
/**
* Specific instantiation of forward declarations.

View File

@ -3,80 +3,91 @@
// ARM is not supported.
#include "Core/public/Math/SIMD/Platform.h"
#include "Core/public/Math/MathTypes.h"
#include <nmmintrin.h> // SSE4.2
#ifdef __AVX__
# include <immintrin.h>
#if P_INTRINSICS == P_INTRINSICS_AVX2
# include "PhanesVectorMathAVX2.hpp"
#elif P_INTRINSICS == P_INTRINSICS_AVX
# include "PhanesVectorMathAVX.hpp"
#elif P_INTRINSICS == P_INTRINSICS_SSE
# include "PhanesVectorMathSSE.hpp"
#elif P_INTRINSICS == P_INTRINSICS_NEON
# include "PhanesVectorMathNeon.hpp"
#elif P_INTRINSICS == P_INTRINSICS_FPU
# include "PhanesVectorMathFPU.hpp"
#endif
namespace Phanes::Core::Math::SIMD
// Register aliases
namespace Phanes::Core::Types
{
// XMM Register wrapper for 4x1 floats
#if P_INTRINSICS >= 1
struct VectorRegister4f
{
public:
__m128 data;
};
typedef __m128 Vec4f32Reg;
typedef __m128d Vec2f64Reg;
typedef VectorRegister4f VectorRegister4f32;
typedef __m128i Vec4i32Reg;
typedef __m128i Vec2i64Reg;
typedef __m128i Vec4u32Reg;
typedef __m128i Vec2u64Reg;
#elif P_INTRINSICS != P_INTRINSICS_NEON
typedef struct alignas(16) Vec4f32Reg { float data[4]; } Vec4f32Reg;
typedef struct alignas(16) Vec2f64Reg { double data[2]; } Vec2f64Reg;
typedef struct alignas(16) Vec4i32Reg { int data[4]; } Vec4i32Reg;
typedef struct alignas(16) Vec2i64Reg { Phanes::Core::Types::int64 data[2]; } Vec2i64Reg;
typedef struct alignas(16) Vec4u32Reg { unsigned int data[4]; } Vec4u32Reg;
typedef struct alignas(16) Vec2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2u64Reg;
#endif
#if P_INTRINSICS >= 2
// XMM Register wrapper for 2x1 doubles
struct VectorRegister2d
{
public:
__m128d data;
};
typedef __m256 Vec4x2f32Reg;
typedef __m256 Vec8f32Reg;
typedef __m256d Vec2x2f64Reg;
typedef __m256d Vec4f64Reg;
typedef VectorRegister2d VectorRegister2f64;
#elif P_INTRINSICS != P_INTRINSICS_NEON
typedef struct alignas(32) Vec4x2f32Reg { float data[8]; } Vec4x2f32Reg;
typedef struct alignas(32) Vec8f32Reg { float data[8]; } Vec8f32Reg;
typedef struct alignas(32) Vec2x2f64Reg { double data[4]; } Vec2x2f64Reg;
typedef struct alignas(32) Vec4f64Reg { double data[4]; } Vec4f64Reg;
#endif
#if P_INTRINSICS == 3
// XMM Register wrapper for 4x1 integers
struct VectorRegister4i
{
public:
__m128i data;
};
typedef __m256i Vec4x2i32Reg;
typedef __m256i Vec8i32Reg;
typedef __m256i Vec2x2i64Reg;
typedef __m256i Vec4i64Reg;
typedef VectorRegister4i VectorRegister4i32;
typedef __m256i Vec4x2u32Reg;
typedef __m256i Vec8u32Reg;
typedef __m256i Vec2x2u64Reg;
typedef __m256i Vec4u64Reg;
#elif P_INTRINSICS != P_INTRINSICS_NEON
typedef struct alignas(32) Vec4x2i32Reg { int data[8]; } Vec4x2i32Reg;
typedef struct alignas(32) Vec8i32Reg { int data[8]; } Vec8i32Reg;
typedef struct alignas(32) Vec2x2i64Reg { Phanes::Core::Types::int64 data[4]; } Vec2x2i64Reg;
typedef struct alignas(32) Vec4i64Reg { Phanes::Core::Types::int64 data[4]; } Vec4i64Reg;
# ifdef __AVX__
typedef struct alignas(32) Vec4x2u32Reg { unsigned int data[8]; } Vec4x2u32Reg;
typedef struct alignas(32) Vec8u32Reg { unsigned int data[8]; } Vec8u32Reg;
typedef struct alignas(32) Vec2x2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2x2u64Reg;
typedef struct alignas(32) Vec4u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec4u64Reg;
// AVX specific types
#endif
// XMM Register wrapper for 4x1 doubles
struct VectorRegister4d
{
public:
__m256d data;
};
typedef VectorRegister4d VectorRegister4f64;
# endif
# ifdef __AVX2__
// AVX2 specific types
// XMM Register wrapper for 4x1 doubles
struct VectorRegister4i64
{
public:
__m256i data;
};
# endif
// NEON ...
}

View File

@ -0,0 +1,6 @@
#pragma once
#include "PhanesVectorMathSSE.hpp" // Include previous
#include <immintrin.h>

View File

@ -0,0 +1,3 @@
#pragma once
#include "PhanesVectorMathAVX.hpp" // Include previous

View File

@ -0,0 +1 @@
#pragma once

View File

@ -0,0 +1,2 @@
#pragma once
#error ARM architecture is not yet supported by PhanesEngine.

View File

@ -0,0 +1,5 @@
#pragma once
#include <nmmintrin.h>

View File

@ -2,26 +2,46 @@
#pragma once
// Architecture MACRO
// Implicitly asumes x86 architecture
#ifndef P_ARM_ARCH
# define P_x86_ARCH
#else
# ifdef P_x86_ARCH
# undef P_x86_ARCH
# endif
# error ARM architecture not supported.
#endif
// Set platform MACRO depending on defined build
#define P_PLATFORM_WIN 0
#define P_PLATFORM_LIN 1
#define P_PLATFORM_MAC 2
// #define P_PLATFORM_FBSD 3 -> Is planed for eventual PS5 support
// User defines build platform
#ifdef P_WIN_BUILD
#define P_PLATFORM 0
# define P_PLATFORM P_PLATFORM_WIN
#elif P_LINUX_BUILD
#define P_PLATFORM 1
#elif P_APPLE_BUILD
#define P_PLATFORM 2
#elif P_PS5_BUILD
#define P_PLATFORM 3
# define P_PLATFORM P_PLATFORM_LIN
# error Linux / Unix system is not yet supported.
#elif P_MAC_BUILD
# define P_PLATFORM P_PLATFORM_MAC
# error Mac target system is not yet supported.
#elif P_PS5_BUILD || P_FBSD_BUILD
# define P_PLATFORM P_PLATFORM_FBSD
# error FreeBSD is not yet supported.
#else
#error Your target system is either not supported, or you have yet to define it.
# error Your target system is either not supported, or you have yet to define it.
#endif
// Set compiler depending on defined compiler
// Compiler macro definition
// ID's defines like [0-9][0-x]
// First bracket is compiler, second is the version of the compiler.
// ID's defined like [0-9][0-x]
// First bracket defines compiler, second defines the version of the compiler.
// Visual C++
#define P_COMPILER_VC22 001
@ -122,7 +142,7 @@
// Clang
#elif (defined(__clang__))
# error PhanesEngine only supports MSVC -> Visual Studio
# if defined(__apple_build_version__)
#
# if (__clang_major__ < 6)
@ -188,6 +208,7 @@
// G++
#elif defined(__GNUC__) || defined(__MINGW32__)
# error PhanesEngine only supports MSVC -> Visual Studio
# if __GNUC__ >= 14
# define P_COMPILER P_COMPILER_GCC14
# elif __GNUC__ >= 13
@ -225,3 +246,69 @@
#endif
// Vector instruction sets
// Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...).
#ifdef P_FORCE_INTRINSICS
# undef __AVX2__
# undef __AVX__
# undef __SSE__
# ifndef P_INTRINSICS
# error P_INTRINSICS must be defined by the user, when P_FORCE_INTRINSICS is used.
# endif
#else
# ifdef __AVX2__
# define P_AVX2__
# elif defined(__AVX__)
# define P_AVX__
# elif defined(__SSE__)
# define P_SSE__
# endif
#endif // !P_FORCE_INTRINSICS
#ifdef P_AVX2__
# define P_AVX__
#endif
#ifdef P_AVX__
# define P_SSE__
#endif
#define P_INTRINSICS_FPU 0
#define P_INTRINSICS_SSE 1
#define P_INTRINSICS_AVX 2
#define P_INTRINSICS_AVX2 3
#define P_INTRINSICS_NEON 4
#if defined(P_FORCE_FPU) // Force, that no intrinsics may be used.
# define P_INTRINSICS P_INTRINSICS_FPU
# undef P_AVX2__
# undef P_AVX__
# undef P_SSE__
# undef P_SSE__
#else
# if defined(P_AVX__) && !defined(P_AVX2__)
# define P_INTRINSICS P_INTRINSICS_AVX
# elif defined(P_AVX2__)
# define P_INTRINSICS P_INTRINSICS_AVX2
# elif (defined(__SSE__) || defined(P_SSE__)) && !defined(P_AVX__)
# define P_INTRINSICS P_INTRINSICS_SSE
# elif defined(P_ARM_ARCH)
# define P_INTRINSICS P_INTRINSICS_NEON
# elif !defined(P_FORCE_INTRINSICS)
# error No SIMD instruction set detected. Use P_FORCE_FPU to disable SIMD extensions.
# endif
#endif

View File

@ -0,0 +1,179 @@
// Defines on compile time, whether a xmm register or an array should be used.
#pragma once
#include "Core/public/Math/SIMD/PhanesSIMD.h"
#include "Core/public/Math/MathTypes.h"
namespace Phanes::Core::SIMD
{
template<size_t L, typename T, bool IsAligned>
struct Storage;
// General unaligned memory storage
template<size_t L, typename T>
struct Storage<L, T, false>
{
typedef struct type {
T data[L];
} type;
};
template<typename T>
struct Storage<3, T, false>
{
typedef struct type {
T data[4];
} type;
};
// SSE4.2
template<>
struct Storage<4, float, true>
{
typedef Phanes::Core::Types::Vec4f32Reg type;
};
template<>
struct Storage<3, float, true>
{
typedef Phanes::Core::Types::Vec4f32Reg type;
};
template<>
struct Storage<4, int, true>
{
typedef Phanes::Core::Types::Vec4i32Reg type;
};
template<>
struct Storage<3, int, true>
{
typedef Phanes::Core::Types::Vec4i32Reg type;
};
template<>
struct Storage<4, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4u32Reg type;
};
template<>
struct Storage<3, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4u32Reg type;
};
template<>
struct Storage<2, double, true>
{
typedef Phanes::Core::Types::Vec2f64Reg type;
};
template<>
struct Storage<2, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec2i64Reg type;
};
template<>
struct Storage<2, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec2u64Reg type;
};
// AVX
template<>
struct Storage<4, double, true>
{
typedef Phanes::Core::Types::Vec4f64Reg type;
};
template<>
struct Storage<3, double, true>
{
typedef Phanes::Core::Types::Vec4f64Reg type;
};
template<>
struct Storage<8, float, true>
{
typedef Phanes::Core::Types::Vec4x2f32Reg type;
};
// AVX2
template<>
struct Storage<4, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec4i64Reg type;
};
template<>
struct Storage<3, Phanes::Core::Types::int64, true>
{
typedef Phanes::Core::Types::Vec4i64Reg type;
};
template<>
struct Storage<4, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec4u64Reg type;
};
template<>
struct Storage<3, Phanes::Core::Types::uint64, true>
{
typedef Phanes::Core::Types::Vec4u64Reg type;
};
template<>
struct Storage<8, int, true>
{
typedef Phanes::Core::Types::Vec4x2i32Reg type;
};
template<>
struct Storage<8, unsigned int, true>
{
typedef Phanes::Core::Types::Vec4x2u32Reg type;
};
}
struct Vec4
{
public:
union
{
struct
{
int x, y, z, w;
};
typename Phanes::Core::SIMD::Storage<4, Phanes::Core::Types::int32, true>::type comp;
};
};
struct Vec4x2
{
public:
union
{
struct
{
Vec4 v1;
Vec4 v2;
};
typename Phanes::Core::SIMD::Storage<8, Phanes::Core::Types::int32, true>::type comp;
};
};