Migrating to Linux
This commit is contained in:
30
Engine/Source/Runtime/Core/Math/SIMD/Alignment.h
Normal file
30
Engine/Source/Runtime/Core/Math/SIMD/Alignment.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/Boilerplate.h"
|
||||
|
||||
|
||||
namespace Phanes::Core::Math::SIMD
|
||||
{
|
||||
|
||||
// Structure to conveniently align arrays.
|
||||
template<typename T, size_t L>
|
||||
struct alignas(sizeof(T) * 4) AlignedVec
|
||||
{
|
||||
public:
|
||||
T data[L];
|
||||
|
||||
AlignedVec(const T* n_aligned_data)
|
||||
{
|
||||
for (size_t i = 0; i < L; ++i)
|
||||
{
|
||||
data[i] = n_aligned_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
const T* Get()
|
||||
{
|
||||
return data;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
228
Engine/Source/Runtime/Core/Math/SIMD/PhanesSIMDTypes.h
Normal file
228
Engine/Source/Runtime/Core/Math/SIMD/PhanesSIMDTypes.h
Normal file
@@ -0,0 +1,228 @@
|
||||
#pragma once
|
||||
|
||||
// This file includes the necessary header for vectorization intrinsics. If no specifics are defined SSE4.2 is used.
|
||||
//
|
||||
// ARM is not supported.
|
||||
|
||||
#include "Core/public/Math/SIMD/Platform.h"
|
||||
#include "Core/public/Math/MathTypes.h"
|
||||
|
||||
#if P_INTRINSICS == P_INTRINSICS_AVX2
|
||||
# include <immintrin.h>
|
||||
#elif P_INTRINSICS == P_INTRINSICS_AVX
|
||||
# include <immintrin.h>
|
||||
#elif P_INTRINSICS == P_INTRINSICS_SSE
|
||||
# include <nmmintrin.h>
|
||||
#elif P_INTRINSICS == P_INTRINSICS_NEON
|
||||
# include "neon.h" // <- Not supported
|
||||
#endif
|
||||
|
||||
// use_simd for metaprogramming
|
||||
namespace Phanes::Core::Math::SIMD
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// This decides, whether simd operations should be used, based on the vector type, it's size, the vector alignment and whether the right extension can be loaded during compiletime.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type of vector</typeparam>
|
||||
/// <typeparam name="L">Length of vector</typeparam>
|
||||
/// <typeparam name="IsAligned">Whether the vector is aligned for simd usage.</typeparam>
|
||||
template<typename T, size_t L, bool IsAligned>
|
||||
struct use_simd
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
|
||||
// SSE / NEON
|
||||
|
||||
template<>
|
||||
struct use_simd<float, 4, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<float, 3, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<int, 4, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<int, 3, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<unsigned int, 4, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<unsigned int, 3, true>
|
||||
{
|
||||
static const bool value = true && (P_SSE__ || P_NEON__);
|
||||
};
|
||||
|
||||
// SSE
|
||||
|
||||
template<>
|
||||
struct use_simd<double, 2, true>
|
||||
{
|
||||
static const bool value = true && P_SSE__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::int64, 2, true>
|
||||
{
|
||||
static const bool value = true && P_SSE__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::uint64, 2, true>
|
||||
{
|
||||
static const bool value = true && P_SSE__;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// AVX
|
||||
|
||||
template<>
|
||||
struct use_simd<double, 4, true>
|
||||
{
|
||||
static const bool value = true && P_AVX__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<double, 3, true>
|
||||
{
|
||||
static const bool value = true && P_AVX__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<float, 8, true>
|
||||
{
|
||||
static const bool value = true && P_AVX__;
|
||||
};
|
||||
|
||||
|
||||
// AVX2
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::int64, 4, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::int64, 3, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::uint64, 4, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<Phanes::Core::Types::uint64, 3, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<int, 8, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct use_simd<unsigned int, 8, true>
|
||||
{
|
||||
static const bool value = true && P_AVX2__;
|
||||
};
|
||||
}
|
||||
|
||||
// Register aliases
|
||||
namespace Phanes::Core::Types
|
||||
{
|
||||
|
||||
#if P_INTRINSICS >= 1
|
||||
|
||||
typedef __m128 Vec4f32Reg;
|
||||
typedef __m128d Vec2f64Reg;
|
||||
|
||||
typedef __m128i Vec4i32Reg;
|
||||
typedef __m128i Vec2i64Reg;
|
||||
|
||||
typedef __m128i Vec4u32Reg;
|
||||
typedef __m128i Vec2u64Reg;
|
||||
|
||||
#elif P_INTRINSICS != P_INTRINSICS_NEON
|
||||
|
||||
typedef struct alignas(16) Vec4f32Reg { float data[4]; } Vec4f32Reg;
|
||||
typedef struct alignas(16) Vec2f64Reg { double data[2]; } Vec2f64Reg;
|
||||
typedef struct alignas(16) Vec4i32Reg { int data[4]; } Vec4i32Reg;
|
||||
typedef struct alignas(16) Vec2i64Reg { Phanes::Core::Types::int64 data[2]; } Vec2i64Reg;
|
||||
typedef struct alignas(16) Vec4u32Reg { unsigned int data[4]; } Vec4u32Reg;
|
||||
typedef struct alignas(16) Vec2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2u64Reg;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if P_INTRINSICS >= 2
|
||||
|
||||
typedef __m256 Vec4x2f32Reg;
|
||||
typedef __m256 Vec8f32Reg;
|
||||
typedef __m256d Vec2x2f64Reg;
|
||||
typedef __m256d Vec4f64Reg;
|
||||
|
||||
#elif P_INTRINSICS != P_INTRINSICS_NEON
|
||||
|
||||
typedef struct alignas(32) Vec4x2f32Reg { float data[8]; } Vec4x2f32Reg;
|
||||
typedef struct alignas(32) Vec8f32Reg { float data[8]; } Vec8f32Reg;
|
||||
typedef struct alignas(32) Vec2x2f64Reg { double data[4]; } Vec2x2f64Reg;
|
||||
typedef struct alignas(32) Vec4f64Reg { double data[4]; } Vec4f64Reg;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if P_INTRINSICS == 3
|
||||
|
||||
typedef __m256i Vec4x2i32Reg;
|
||||
typedef __m256i Vec8i32Reg;
|
||||
typedef __m256i Vec2x2i64Reg;
|
||||
typedef __m256i Vec4i64Reg;
|
||||
|
||||
typedef __m256i Vec4x2u32Reg;
|
||||
typedef __m256i Vec8u32Reg;
|
||||
typedef __m256i Vec2x2u64Reg;
|
||||
typedef __m256i Vec4u64Reg;
|
||||
|
||||
#elif P_INTRINSICS != P_INTRINSICS_NEON
|
||||
|
||||
typedef struct alignas(32) Vec4x2i32Reg { int data[8]; } Vec4x2i32Reg;
|
||||
typedef struct alignas(32) Vec8i32Reg { int data[8]; } Vec8i32Reg;
|
||||
typedef struct alignas(32) Vec2x2i64Reg { Phanes::Core::Types::int64 data[4]; } Vec2x2i64Reg;
|
||||
typedef struct alignas(32) Vec4i64Reg { Phanes::Core::Types::int64 data[4]; } Vec4i64Reg;
|
||||
|
||||
typedef struct alignas(32) Vec4x2u32Reg { unsigned int data[8]; } Vec4x2u32Reg;
|
||||
typedef struct alignas(32) Vec8u32Reg { unsigned int data[8]; } Vec8u32Reg;
|
||||
typedef struct alignas(32) Vec2x2u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec2x2u64Reg;
|
||||
typedef struct alignas(32) Vec4u64Reg { Phanes::Core::Types::uint64 data[4]; } Vec4u64Reg;
|
||||
|
||||
#endif
|
||||
|
||||
// NEON ...
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "PhanesVectorMathSSE.hpp" // Include previous
|
||||
|
||||
#include <immintrin.h>
|
||||
|
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
#include "PhanesVectorMathAVX.hpp" // Include previous
|
74
Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp
Normal file
74
Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathFPU.hpp
Normal file
@@ -0,0 +1,74 @@
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
|
||||
#include "Core/public/Math/MathCommon.hpp"
|
||||
|
||||
|
||||
namespace Phanes::Core::Math::SIMD
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds all scalars of the vector.
|
||||
/// </summary>
|
||||
/// <param name="v">Vector</param>
|
||||
/// <returns>Sum stored in v[0:31].</returns>
|
||||
Phanes::Core::Types::Vec4f32Reg vec4_hadd(const Phanes::Core::Types::Vec4f32Reg v)
|
||||
{
|
||||
Phanes::Core::Types::Vec4f32Reg r;
|
||||
r.data[0] = v.data[0] + v.data[1] + v.data[2] + v.data[3];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds all scalars of the vector.
|
||||
/// </summary>
|
||||
/// <param name="v">Vector</param>
|
||||
/// <returns>Sum of components.</returns>
|
||||
float vec4_hadd_cvtf32(const Phanes::Core::Types::Vec4f32Reg v)
|
||||
{
|
||||
return v.data[0] + v.data[1] + v.data[2] + v.data[3];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the absolute value of each scalar in the vector.
|
||||
/// </summary>
|
||||
/// <param name="v">Vector</param>
|
||||
/// <returns>Vector with all components positive.</returns>
|
||||
Phanes::Core::Types::Vec4f32Reg vec4_abs(const Phanes::Core::Types::Vec4f32Reg v)
|
||||
{
|
||||
Phanes::Core::Types::Vec4f32Reg r;
|
||||
|
||||
r.data[0] = Abs(v.data[0]);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dot product of the
|
||||
/// </summary>
|
||||
/// <param name="v1"></param>
|
||||
/// <param name="v2"></param>
|
||||
/// <returns></returns>
|
||||
Phanes::Core::Types::Vec4f32Reg vec4_dot(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
|
||||
{
|
||||
Phanes::Core::Types::Vec4f32Reg r;
|
||||
r.data[0] = v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dot product of the
|
||||
/// </summary>
|
||||
/// <param name="v1"></param>
|
||||
/// <param name="v2"></param>
|
||||
/// <returns></returns>
|
||||
float vec4_dot_cvtf32(const Phanes::Core::Types::Vec4f32Reg v1, const Phanes::Core::Types::Vec4f32Reg v2)
|
||||
{
|
||||
return v1.data[0] * v1.data[0] + v1.data[1] * v2.data[1] + v1.data[2] * v2.data[2] + v1.data[3] * v2.data[3];
|
||||
}
|
||||
|
||||
Phanes::Core::Types::Vec2f64Reg vec2_eq(const Phanes::Core::Types::Vec2f64Reg v1, const Phanes::Core::Types::Vec2f64Reg v2)
|
||||
{
|
||||
Phanes::Core::Types::Vec4f64Reg r;
|
||||
|
||||
r.data[0] = (Phanes::Core::Math::Abs(v1.data[0] - v2.data[0]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
|
||||
r.data[1] = (Phanes::Core::Math::Abs(v1.data[1] - v2.data[1]) < P_FLT_INAC) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
}
|
@@ -0,0 +1,2 @@
|
||||
#pragma once
|
||||
#error ARM architecture is not yet supported by PhanesEngine.
|
1508
Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp
Normal file
1508
Engine/Source/Runtime/Core/Math/SIMD/PhanesVectorMathSSE.hpp
Normal file
File diff suppressed because it is too large
Load Diff
330
Engine/Source/Runtime/Core/Math/SIMD/Platform.h
Normal file
330
Engine/Source/Runtime/Core/Math/SIMD/Platform.h
Normal file
@@ -0,0 +1,330 @@
|
||||
// Platform / Compiler detection.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Architecture MACRO
|
||||
// Implicitly asumes x86 architecture
|
||||
#ifndef P_ARM_ARCH
|
||||
# define P_x86_ARCH
|
||||
#else
|
||||
# ifdef P_x86_ARCH
|
||||
# undef P_x86_ARCH
|
||||
# endif
|
||||
# error ARM architecture not supported.
|
||||
#endif
|
||||
|
||||
// Set platform MACRO depending on defined build
|
||||
|
||||
#define P_PLATFORM_WIN 0
|
||||
#define P_PLATFORM_LIN 1
|
||||
#define P_PLATFORM_MAC 2
|
||||
// #define P_PLATFORM_FBSD 3 -> Is planed for eventual PS5 support
|
||||
|
||||
// User defines build platform
|
||||
#ifdef P_WIN_BUILD
|
||||
# define P_PLATFORM P_PLATFORM_WIN
|
||||
#elif P_LINUX_BUILD
|
||||
# define P_PLATFORM P_PLATFORM_LIN
|
||||
# error Linux / Unix system is not yet supported.
|
||||
#elif P_MAC_BUILD
|
||||
# define P_PLATFORM P_PLATFORM_MAC
|
||||
# error Mac target system is not yet supported.
|
||||
#elif P_PS5_BUILD || P_FBSD_BUILD
|
||||
# define P_PLATFORM P_PLATFORM_FBSD
|
||||
# error FreeBSD is not yet supported.
|
||||
#else
|
||||
# error Your target system is either not supported, or you have yet to define it.
|
||||
#endif
|
||||
|
||||
// Set compiler depending on defined compiler
|
||||
|
||||
// Compiler macro definition
|
||||
|
||||
// ID's defined like [0-9][0-x]
|
||||
// First bracket defines compiler, second defines the version of the compiler.
|
||||
|
||||
// Visual C++
|
||||
#define P_COMPILER_VC22 001
|
||||
#define P_COMPILER_VC19 002
|
||||
#define P_COMPILER_VC17 003
|
||||
#define P_COMPILER_VC15 004
|
||||
#define P_COMPILER_VC13 005
|
||||
#define P_COMPILER_VC12 006
|
||||
#define P_COMPILER_VC10 007
|
||||
#define P_COMPILER_VC08 008
|
||||
#define P_COMPILER_VC05 009
|
||||
#define P_COMPILER_VC03 010
|
||||
#define P_COMPILER_VC02 011
|
||||
#define P_COMPILER_VCSP 012
|
||||
|
||||
// Clang
|
||||
#define P_COMPILER_CLANG34 101
|
||||
#define P_COMPILER_CLANG35 102
|
||||
#define P_COMPILER_CLANG36 103
|
||||
#define P_COMPILER_CLANG37 104
|
||||
#define P_COMPILER_CLANG38 105
|
||||
#define P_COMPILER_CLANG39 106
|
||||
#define P_COMPILER_CLANG4 107
|
||||
#define P_COMPILER_CLANG5 108
|
||||
#define P_COMPILER_CLANG6 109
|
||||
#define P_COMPILER_CLANG7 110
|
||||
#define P_COMPILER_CLANG8 111
|
||||
#define P_COMPILER_CLANG9 112
|
||||
#define P_COMPILER_CLANG10 113
|
||||
#define P_COMPILER_CLANG11 114
|
||||
#define P_COMPILER_CLANG12 115
|
||||
#define P_COMPILER_CLANG13 116
|
||||
#define P_COMPILER_CLANG14 117
|
||||
#define P_COMPILER_CLANG15 118
|
||||
#define P_COMPILER_CLANG16 119
|
||||
#define P_COMPILER_CLANG17 120
|
||||
#define P_COMPILER_CLANG18 121
|
||||
#define P_COMPILER_CLANG19 122
|
||||
|
||||
|
||||
// G++
|
||||
#define P_COMPILER_GCC46 201
|
||||
#define P_COMPILER_GCC47 202
|
||||
#define P_COMPILER_GCC48 203
|
||||
#define P_COMPILER_GCC49 204
|
||||
#define P_COMPILER_GCC5 205
|
||||
#define P_COMPILER_GCC6 206
|
||||
#define P_COMPILER_GCC61 207
|
||||
#define P_COMPILER_GCC7 208
|
||||
#define P_COMPILER_GCC8 209
|
||||
#define P_COMPILER_GCC9 210
|
||||
#define P_COMPILER_GCC10 211
|
||||
#define P_COMPILER_GCC11 212
|
||||
#define P_COMPILER_GCC12 213
|
||||
#define P_COMPILER_GCC13 214
|
||||
#define P_COMPILER_GCC14 215
|
||||
|
||||
|
||||
// Intel C++
|
||||
#define P_COMPILER_INTEL14 301
|
||||
#define P_COMPILER_INTEL15 302
|
||||
#define P_COMPILER_INTEL16 303
|
||||
#define P_COMPILER_INTEL17 304
|
||||
#define P_COMPILER_INTEL18 305
|
||||
#define P_COMPILER_INTEL19 306
|
||||
#define P_COMPILER_INTEL21 307
|
||||
|
||||
// Visual studio
|
||||
#ifdef _MSC_VER
|
||||
# if _MSC_VER >= 1930
|
||||
# define P_COMPILER P_COMPILER_VC22
|
||||
# elif _MSC_VER >= 1920
|
||||
# define P_COMPILER P_COMPILER_VC19
|
||||
# elif _MSC_VER >= 1910
|
||||
# define P_COMPILER P_COMPILER_VC17
|
||||
# elif _MSC_VER >= 1900
|
||||
# define P_COMPILER P_COMPILER_VC15
|
||||
# elif _MSC_VER >= 1800
|
||||
# define P_COMPILER P_COMPILER_VC13
|
||||
# elif _MSC_VER >= 1700
|
||||
# define P_COMPILER P_COMPILER_VC12
|
||||
# elif _MSC_VER >= 1600
|
||||
# define P_COMPILER P_COMPILER_VC10
|
||||
# elif _MSC_VER >= 1500
|
||||
# define P_COMPILER P_COMPILER_VC08
|
||||
# elif _MSC_VER >= 1400
|
||||
# define P_COMPILER P_COMPILER_VC05
|
||||
# elif _MSC_VER >= 1310
|
||||
# define P_COMPILER P_COMPILER_VC03
|
||||
# elif _MSC_VER >= 1300
|
||||
# define P_COMPILER P_COMPILER_VC02
|
||||
# elif _MSC_VER >= 1200
|
||||
# define P_COMPILER P_COMPILER_VCSP
|
||||
# endif
|
||||
|
||||
|
||||
|
||||
// Clang
|
||||
|
||||
#elif (defined(__clang__))
|
||||
# error PhanesEngine only supports MSVC -> Visual Studio
|
||||
# if defined(__apple_build_version__)
|
||||
#
|
||||
# if (__clang_major__ < 6)
|
||||
# error "GLM requires Clang 3.4 / Apple Clang 6.0 or higher"
|
||||
# elif __clang_major__ == 6 && __clang_minor__ == 0
|
||||
# define P_COMPILER P_COMPILER_CLANG35
|
||||
# elif __clang_major__ == 6 && __clang_minor__ >= 1
|
||||
# define P_COMPILER P_COMPILER_CLANG36
|
||||
# elif __clang_major__ >= 7
|
||||
# define P_COMPILER P_COMPILER_CLANG37
|
||||
# endif
|
||||
# else
|
||||
# if ((__clang_major__ == 3) && (__clang_minor__ < 4)) || (__clang_major__ < 3)
|
||||
# error "GLM requires Clang 3.4 or higher"
|
||||
# elif __clang_major__ == 3 && __clang_minor__ == 4
|
||||
# define P_COMPILER P_COMPILER_CLANG34
|
||||
# elif __clang_major__ == 3 && __clang_minor__ == 5
|
||||
# define P_COMPILER P_COMPILER_CLANG35
|
||||
# elif __clang_major__ == 3 && __clang_minor__ == 6
|
||||
# define P_COMPILER P_COMPILER_CLANG36
|
||||
# elif __clang_major__ == 3 && __clang_minor__ == 7
|
||||
# define P_COMPILER P_COMPILER_CLANG37
|
||||
# elif __clang_major__ == 3 && __clang_minor__ == 8
|
||||
# define P_COMPILER P_COMPILER_CLANG38
|
||||
# elif __clang_major__ == 3 && __clang_minor__ >= 9
|
||||
# define P_COMPILER P_COMPILER_CLANG39
|
||||
# elif __clang_major__ == 4 && __clang_minor__ == 0
|
||||
# define P_COMPILER P_COMPILER_CLANG4
|
||||
# elif __clang_major__ == 5
|
||||
# define P_COMPILER P_COMPILER_CLANG5
|
||||
# elif __clang_major__ == 6
|
||||
# define P_COMPILER P_COMPILER_CLANG6
|
||||
# elif __clang_major__ == 7
|
||||
# define P_COMPILER P_COMPILER_CLANG7
|
||||
# elif __clang_major__ == 8
|
||||
# define P_COMPILER P_COMPILER_CLANG8
|
||||
# elif __clang_major__ == 9
|
||||
# define P_COMPILER P_COMPILER_CLANG9
|
||||
# elif __clang_major__ == 10
|
||||
# define P_COMPILER P_COMPILER_CLANG10
|
||||
# elif __clang_major__ == 11
|
||||
# define P_COMPILER P_COMPILER_CLANG11
|
||||
# elif __clang_major__ == 12
|
||||
# define P_COMPILER P_COMPILER_CLANG12
|
||||
# elif __clang_major__ == 13
|
||||
# define P_COMPILER P_COMPILER_CLANG13
|
||||
# elif __clang_major__ == 14
|
||||
# define P_COMPILER P_COMPILER_CLANG14
|
||||
# elif __clang_major__ == 15
|
||||
# define P_COMPILER P_COMPILER_CLANG15
|
||||
# elif __clang_major__ == 16
|
||||
# define P_COMPILER P_COMPILER_CLANG16
|
||||
# elif __clang_major__ == 17
|
||||
# define P_COMPILER P_COMPILER_CLANG17
|
||||
# elif __clang_major__ == 18
|
||||
# define P_COMPILER P_COMPILER_CLANG18
|
||||
# elif __clang_major__ >= 19
|
||||
# define P_COMPILER P_COMPILER_CLANG19
|
||||
# endif
|
||||
# endif
|
||||
|
||||
|
||||
|
||||
// G++
|
||||
#elif defined(__GNUC__) || defined(__MINGW32__)
|
||||
# error PhanesEngine only supports MSVC -> Visual Studio
|
||||
# if __GNUC__ >= 14
|
||||
# define P_COMPILER P_COMPILER_GCC14
|
||||
# elif __GNUC__ >= 13
|
||||
# define P_COMPILER P_COMPILER_GCC13
|
||||
# elif __GNUC__ >= 12
|
||||
# define P_COMPILER P_COMPILER_GCC12
|
||||
# elif __GNUC__ >= 11
|
||||
# define P_COMPILER P_COMPILER_GCC11
|
||||
# elif __GNUC__ >= 10
|
||||
# define P_COMPILER P_COMPILER_GCC10
|
||||
# elif __GNUC__ >= 9
|
||||
# define P_COMPILER P_COMPILER_GCC9
|
||||
# elif __GNUC__ >= 8
|
||||
# define P_COMPILER P_COMPILER_GCC8
|
||||
# elif __GNUC__ >= 7
|
||||
# define P_COMPILER P_COMPILER_GCC7
|
||||
# elif __GNUC__ >= 6
|
||||
# define P_COMPILER P_COMPILER_GCC6
|
||||
# elif __GNUC__ >= 5
|
||||
# define P_COMPILER P_COMPILER_GCC5
|
||||
# elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
|
||||
# define P_COMPILER P_COMPILER_GCC49
|
||||
# elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8
|
||||
# define P_COMPILER P_COMPILER_GCC48
|
||||
# elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7
|
||||
# define P_COMPILER P_COMPILER_GCC47
|
||||
# elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6
|
||||
# define P_COMPILER P_COMPILER_GCC46
|
||||
# elif ((__GNUC__ == 4) && (__GNUC_MINOR__ < 6)) || (__GNUC__ < 4)
|
||||
# error PhanesEngine does not support your compiler.
|
||||
# endif
|
||||
|
||||
#elif defined(__CUDACC__)
|
||||
# error CUDA C++ is not supported by PhanesEngine
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
// Vector instruction sets
|
||||
|
||||
|
||||
// Define also supported instruction sets for Visual Studio, as it only defines the latest (e.g. only __AVX__ not __SSE4__ ...).
|
||||
|
||||
#ifdef P_FORCE_INTRINSICS
|
||||
|
||||
# undef __AVX2__
|
||||
# undef __AVX__
|
||||
# undef __SSE__
|
||||
|
||||
# ifndef P_INTRINSICS
|
||||
# error P_INTRINSICS must be defined by the user, when P_FORCE_INTRINSICS is used.
|
||||
# endif
|
||||
|
||||
#elif !defined(P_FORCE_FPU)
|
||||
# ifdef __AVX2__
|
||||
# define P_AVX2__ 1
|
||||
# elif defined(__AVX__)
|
||||
# define P_AVX__ 1
|
||||
# elif defined(__SSE__)
|
||||
# define P_SSE__ 1
|
||||
# endif
|
||||
|
||||
#endif // !P_FORCE_INTRINSICS
|
||||
|
||||
#ifdef P_AVX2__
|
||||
# define P_AVX__ 1
|
||||
#endif
|
||||
|
||||
#ifdef P_AVX__
|
||||
# define P_SSE__ 1
|
||||
#endif
|
||||
|
||||
// Deactivate unset SIMD
|
||||
#ifndef P_AVX2__
|
||||
# define P_AVX2__ 0
|
||||
#endif
|
||||
|
||||
// Deactivate unset SIMD
|
||||
#ifndef P_AVX__
|
||||
# define P_AVX__ 0
|
||||
#endif
|
||||
|
||||
#ifndef P_SSE__
|
||||
# define P_SSE__ 0
|
||||
#endif
|
||||
|
||||
#ifndef P_NEON__
|
||||
# define P_NEON__ 0
|
||||
#endif
|
||||
|
||||
#define P_INTRINSICS_FPU 0
|
||||
#define P_INTRINSICS_SSE 1
|
||||
#define P_INTRINSICS_AVX 2
|
||||
#define P_INTRINSICS_AVX2 3
|
||||
#define P_INTRINSICS_NEON 4
|
||||
|
||||
|
||||
#if defined(P_FORCE_FPU) // Force, that no intrinsics may be used.
|
||||
# define P_INTRINSICS P_INTRINSICS_FPU
|
||||
# define P_AVX2__ 0
|
||||
# define P_AVX__ 0
|
||||
# define P_SSE__ 0
|
||||
# define P_SSE__ 0
|
||||
#else
|
||||
# if (P_AVX__ == 1) && (P_AVX2__ == 0)
|
||||
# define P_INTRINSICS P_INTRINSICS_AVX
|
||||
# elif P_AVX2__ == 1
|
||||
# define P_INTRINSICS P_INTRINSICS_AVX2
|
||||
# elif P_SSE__ == 1
|
||||
# define P_INTRINSICS P_INTRINSICS_SSE
|
||||
# elif defined(P_ARM_ARCH)
|
||||
# define P_INTRINSICS P_INTRINSICS_NEON
|
||||
# define P_NEON__ 1
|
||||
# elif !defined(P_FORCE_INTRINSICS)
|
||||
# error No SIMD instruction set detected. Use P_FORCE_FPU to disable SIMD extensions.
|
||||
# endif
|
||||
#endif
|
15
Engine/Source/Runtime/Core/Math/SIMD/SIMDIntrinsics.h
Normal file
15
Engine/Source/Runtime/Core/Math/SIMD/SIMDIntrinsics.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "Core/public/Math/SIMD/Platform.h"
|
||||
|
||||
#if P_INTRINSICS == P_INTRINSICS_AVX2
|
||||
# include "PhanesVectorMathAVX2.hpp"
|
||||
#elif P_INTRINSICS == P_INTRINSICS_AVX
|
||||
# include "PhanesVectorMathAVX.hpp"
|
||||
#elif P_INTRINSICS == P_INTRINSICS_SSE
|
||||
# include "PhanesVectorMathSSE.hpp"
|
||||
#elif P_INTRINSICS == P_INTRINSICS_NEON
|
||||
# include "PhanesVectorMathNeon.hpp"
|
||||
#endif
|
||||
|
140
Engine/Source/Runtime/Core/Math/SIMD/Storage.h
Normal file
140
Engine/Source/Runtime/Core/Math/SIMD/Storage.h
Normal file
@@ -0,0 +1,140 @@
|
||||
// Defines on compile time, whether a xmm register or an array should be used.
|
||||
#pragma once
|
||||
|
||||
#include "Core/public/Math/SIMD/PhanesSIMDTypes.h"
|
||||
|
||||
#include "Core/public/Math/MathTypes.h"
|
||||
|
||||
namespace Phanes::Core::Math::SIMD
|
||||
{
|
||||
template<size_t L, typename T, bool UseSimd>
|
||||
struct Storage;
|
||||
|
||||
// General unaligned memory storage
|
||||
template<size_t L, typename T>
|
||||
struct Storage<L, T, false>
|
||||
{
|
||||
typedef T type[4];
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct Storage<3, T, false>
|
||||
{
|
||||
typedef T type[4];
|
||||
};
|
||||
|
||||
|
||||
// SSE4.2
|
||||
|
||||
template<>
|
||||
struct Storage<4, float, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4f32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, float, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4f32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<4, int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4i32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4i32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<4, unsigned int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4u32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, unsigned int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4u32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<2, double, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec2f64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<2, Phanes::Core::Types::int64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec2i64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<2, Phanes::Core::Types::uint64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec2u64Reg type;
|
||||
};
|
||||
|
||||
|
||||
// AVX
|
||||
template<>
|
||||
struct Storage<4, double, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4f64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, double, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4f64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<8, float, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4x2f32Reg type;
|
||||
};
|
||||
|
||||
|
||||
// AVX2
|
||||
template<>
|
||||
struct Storage<4, Phanes::Core::Types::int64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4i64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, Phanes::Core::Types::int64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4i64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<4, Phanes::Core::Types::uint64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4u64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<3, Phanes::Core::Types::uint64, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4u64Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<8, int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4x2i32Reg type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Storage<8, unsigned int, true>
|
||||
{
|
||||
typedef Phanes::Core::Types::Vec4x2u32Reg type;
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user