forked from sascha/godot
parent
4b8a1d21f3
commit
1cfed0d583
@ -0,0 +1,986 @@
|
||||
#pragma once
|
||||
|
||||
#include "SSE2NEON.h"
|
||||
|
||||
|
||||
#define AVX2NEON_ABI static inline __attribute__((always_inline))
|
||||
|
||||
|
||||
struct __m256d;
|
||||
|
||||
struct __m256 {
|
||||
__m128 lo,hi;
|
||||
__m256() {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
struct __m256i {
|
||||
__m128i lo,hi;
|
||||
explicit __m256i(const __m256 a) : lo(__m128i(a.lo)),hi(__m128i(a.hi)) {}
|
||||
operator __m256() const {__m256 res; res.lo = __m128(lo);res.hi = __m128(hi); return res;}
|
||||
__m256i() {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
struct __m256d {
|
||||
float64x2_t lo,hi;
|
||||
__m256d() {}
|
||||
__m256d(const __m256& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {}
|
||||
__m256d(const __m256i& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {}
|
||||
};
|
||||
|
||||
#define UNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a) {type res;res.lo=basic_func(a.lo);res.hi=basic_func(a.hi);return res;}
|
||||
|
||||
|
||||
#define BINARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=basic_func(a.lo,b.lo);res.hi=basic_func(a.hi,b.hi);return res;}
|
||||
#define BINARY_AVX_OP_CAST(type,func,basic_func,bdst,bsrc) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=bdst(basic_func(bsrc(a.lo),bsrc(b.lo)));res.hi=bdst(basic_func(bsrc(a.hi),bsrc(b.hi)));return res;}
|
||||
|
||||
#define TERNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b,const type& c) {type res;res.lo=basic_func(a.lo,b.lo,c.lo);res.hi=basic_func(a.hi,b.hi,c.hi);return res;}
|
||||
|
||||
|
||||
#define CAST_SIMD_TYPE(to,name,from,basic_dst) AVX2NEON_ABI to name(const from& a) { to res; res.lo = basic_dst(a.lo); res.hi=basic_dst(a.hi); return res;}
|
||||
|
||||
|
||||
|
||||
#define _mm_stream_load_si128 _mm_load_si128
|
||||
#define _mm256_stream_load_si256 _mm256_load_si256
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8)
|
||||
{
|
||||
__m128 res;
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
if (imm8 & (1<<i))
|
||||
{
|
||||
res[i] = b[i];
|
||||
}
|
||||
else{
|
||||
res[i] = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8)
|
||||
{
|
||||
__m128i res;
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
if (imm8 & (1<<i))
|
||||
{
|
||||
res[i] = b[i];
|
||||
}
|
||||
else{
|
||||
res[i] = a[i];
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_cmpngt_ps (__m128 a, __m128 b)
|
||||
{
|
||||
return __m128(vmvnq_s32(__m128i(_mm_cmpgt_ps(a,b))));
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128i _mm_loadl_epi64 (__m128i const* mem_addr)
|
||||
{
|
||||
int64x2_t y;
|
||||
y[0] = *(int64_t *)mem_addr;
|
||||
y[1] = 0;
|
||||
return __m128i(y);
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
int _mm_movemask_popcnt(__m128 a)
|
||||
{
|
||||
return __builtin_popcount(_mm_movemask_ps(a));
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_maskload_ps (float const * mem_addr, __m128i mask)
|
||||
{
|
||||
__m128 res;
|
||||
for (int i=0;i<4;i++) {
|
||||
if (mask[i] & 0x80000000) res[i] = mem_addr[i]; else res[i] = 0;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a)
|
||||
{
|
||||
for (int i=0;i<4;i++) {
|
||||
if (mask[i] & 0x80000000) mem_addr[i] = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
void _mm_maskstore_epi32 (int * mem_addr, __m128i mask, __m128i a)
|
||||
{
|
||||
for (int i=0;i<4;i++) {
|
||||
if (mask[i] & 0x80000000) mem_addr[i] = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
|
||||
{
|
||||
return vnegq_f32(vfmaq_f32(c,a,b));
|
||||
}
|
||||
|
||||
#define _mm_fnmsub_ss _mm_fnmsub_ps
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
|
||||
{
|
||||
return vfmsq_f32(c,a,b);
|
||||
}
|
||||
|
||||
#define _mm_fnmadd_ss _mm_fnmadd_ps
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_broadcast_ss (float const * mem_addr)
|
||||
{
|
||||
return vdupq_n_f32(*mem_addr);
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
|
||||
{
|
||||
return vfmaq_f32(vnegq_f32(c),a,b);
|
||||
}
|
||||
|
||||
#define _mm_fmsub_ss _mm_fmsub_ps
|
||||
#define _mm_fmadd_ps _mm_madd_ps
|
||||
#define _mm_fmadd_ss _mm_madd_ps
|
||||
|
||||
|
||||
|
||||
template<int code>
|
||||
AVX2NEON_ABI float32x4_t dpps_neon(const float32x4_t& a,const float32x4_t& b)
|
||||
{
|
||||
float v;
|
||||
v = 0;
|
||||
v += (code & 0x10) ? a[0]*b[0] : 0;
|
||||
v += (code & 0x20) ? a[1]*b[1] : 0;
|
||||
v += (code & 0x40) ? a[2]*b[2] : 0;
|
||||
v += (code & 0x80) ? a[3]*b[3] : 0;
|
||||
float32x4_t res;
|
||||
res[0] = (code & 0x1) ? v : 0;
|
||||
res[1] = (code & 0x2) ? v : 0;
|
||||
res[2] = (code & 0x4) ? v : 0;
|
||||
res[3] = (code & 0x8) ? v : 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline float32x4_t dpps_neon<0x7f>(const float32x4_t& a,const float32x4_t& b)
|
||||
{
|
||||
float v;
|
||||
float32x4_t m = _mm_mul_ps(a,b);
|
||||
m[3] = 0;
|
||||
v = vaddvq_f32(m);
|
||||
return _mm_set1_ps(v);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline float32x4_t dpps_neon<0xff>(const float32x4_t& a,const float32x4_t& b)
|
||||
{
|
||||
float v;
|
||||
float32x4_t m = _mm_mul_ps(a,b);
|
||||
v = vaddvq_f32(m);
|
||||
return _mm_set1_ps(v);
|
||||
}
|
||||
|
||||
#define _mm_dp_ps(a,b,c) dpps_neon<c>((a),(b))
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_cmpnge_ps (__m128 a, __m128 b)
|
||||
{
|
||||
return __m128(vmvnq_s32(__m128i(_mm_cmpge_ps(a,b))));
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm_permutevar_ps (__m128 a, __m128i b)
|
||||
{
|
||||
__m128 x;
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
x[i] = a[b[i&3]];
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_setzero_si256()
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = res.hi = vdupq_n_s32(0);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_setzero_ps()
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = res.hi = vdupq_n_f32(0.0f);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_undefined_si256()
|
||||
{
|
||||
return _mm256_setzero_si256();
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_undefined_ps()
|
||||
{
|
||||
return _mm256_setzero_ps();
|
||||
}
|
||||
|
||||
CAST_SIMD_TYPE(__m256d,_mm256_castps_pd,__m256,float64x2_t)
|
||||
CAST_SIMD_TYPE(__m256i,_mm256_castps_si256,__m256,__m128i)
|
||||
CAST_SIMD_TYPE(__m256, _mm256_castsi256_ps, __m256i,__m128)
|
||||
CAST_SIMD_TYPE(__m256, _mm256_castpd_ps ,__m256d,__m128)
|
||||
CAST_SIMD_TYPE(__m256d, _mm256_castsi256_pd, __m256i,float64x2_t)
|
||||
CAST_SIMD_TYPE(__m256i, _mm256_castpd_si256, __m256d,__m128i)
|
||||
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm256_castps256_ps128 (__m256 a)
|
||||
{
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_castsi128_si256 (__m128i a)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = a ;
|
||||
res.hi = vdupq_n_s32(0);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128i _mm256_castsi256_si128 (__m256i a)
|
||||
{
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_castps128_ps256 (__m128 a)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = a;
|
||||
res.hi = vdupq_n_f32(0);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_broadcast_ss (float const * mem_addr)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = res.hi = vdupq_n_f32(*mem_addr);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_set_epi32 (int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0)
|
||||
{
|
||||
__m128i lo = {e0,e1,e2,e3}, hi = {e4,e5,e6,e7};
|
||||
__m256i res;
|
||||
res.lo = lo; res.hi = hi;
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_set1_epi32 (int a)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = res.hi = vdupq_n_s32(a);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
int _mm256_movemask_ps(const __m256& v)
|
||||
{
|
||||
return (_mm_movemask_ps(v.hi) << 4) | _mm_movemask_ps(v.lo);
|
||||
}
|
||||
|
||||
template<int imm8>
|
||||
AVX2NEON_ABI
|
||||
__m256 __mm256_permute_ps (const __m256& a)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_shuffle_ps(a.lo,a.lo,imm8);
|
||||
res.hi = _mm_shuffle_ps(a.hi,a.hi,imm8);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
#define _mm256_permute_ps(a,c) __mm256_permute_ps<c>(a)
|
||||
|
||||
|
||||
template<int imm8>
|
||||
AVX2NEON_ABI
|
||||
__m256 __mm256_shuffle_ps (const __m256 a,const __m256& b)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_shuffle_ps(a.lo,b.lo,imm8);
|
||||
res.hi = _mm_shuffle_ps(a.hi,b.hi,imm8);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
#define _mm256_shuffle_ps(a,b,c) __mm256_shuffle_ps<c>(a,b)
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_set1_epi64x (long long a)
|
||||
{
|
||||
__m256i res;
|
||||
int64x2_t t = vdupq_n_s64(a);
|
||||
res.lo = res.hi = __m128i(t);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8)
|
||||
{
|
||||
__m256 res;
|
||||
__m128 tmp;
|
||||
switch (imm8 & 0x7)
|
||||
{
|
||||
case 0: tmp = a.lo; break;
|
||||
case 1: tmp = a.hi; break;
|
||||
case 2: tmp = b.lo; break;
|
||||
case 3: tmp = b.hi; break;
|
||||
}
|
||||
if (imm8 & 0x8)
|
||||
tmp = _mm_setzero_ps();
|
||||
|
||||
|
||||
|
||||
res.lo = tmp;
|
||||
imm8 >>= 4;
|
||||
|
||||
switch (imm8 & 0x7)
|
||||
{
|
||||
case 0: tmp = a.lo; break;
|
||||
case 1: tmp = a.hi; break;
|
||||
case 2: tmp = b.lo; break;
|
||||
case 3: tmp = b.hi; break;
|
||||
}
|
||||
if (imm8 & 0x8)
|
||||
tmp = _mm_setzero_ps();
|
||||
|
||||
res.hi = tmp;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_moveldup_ps (__m256 a)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo[0] = res.lo[1] = a.lo[0];
|
||||
res.lo[2] = res.lo[3] = a.lo[2];
|
||||
res.hi[0] = res.hi[1] = a.hi[0];
|
||||
res.hi[2] = res.hi[3] = a.hi[2];
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_movehdup_ps (__m256 a)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo[0] = res.lo[1] = a.lo[1];
|
||||
res.lo[2] = res.lo[3] = a.lo[3];
|
||||
res.hi[0] = res.hi[1] = a.hi[1];
|
||||
res.hi[2] = res.hi[3] = a.hi[3];
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8)
|
||||
{
|
||||
__m256 res = a;
|
||||
if (imm8 & 1) res.hi = b;
|
||||
else res.lo = b;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128 _mm256_extractf128_ps (__m256 a, const int imm8)
|
||||
{
|
||||
if (imm8 & 1) return a.hi;
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256d _mm256_movedup_pd (__m256d a)
|
||||
{
|
||||
__m256d res;
|
||||
res.hi = a.hi;
|
||||
res.lo[0] = res.lo[1] = a.lo[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_abs_epi32(__m256i a)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = vabsq_s32(a.lo);
|
||||
res.hi = vabsq_s32(a.hi);
|
||||
return res;
|
||||
}
|
||||
|
||||
UNARY_AVX_OP(__m256,_mm256_sqrt_ps,_mm_sqrt_ps)
|
||||
UNARY_AVX_OP(__m256,_mm256_rsqrt_ps,_mm_rsqrt_ps)
|
||||
UNARY_AVX_OP(__m256,_mm256_rcp_ps,_mm_rcp_ps)
|
||||
UNARY_AVX_OP(__m256,_mm256_floor_ps,vrndmq_f32)
|
||||
UNARY_AVX_OP(__m256,_mm256_ceil_ps,vrndpq_f32)
|
||||
|
||||
|
||||
BINARY_AVX_OP(__m256i,_mm256_add_epi32,_mm_add_epi32)
|
||||
BINARY_AVX_OP(__m256i,_mm256_sub_epi32,_mm_sub_epi32)
|
||||
BINARY_AVX_OP(__m256i,_mm256_mullo_epi32,_mm_mullo_epi32)
|
||||
|
||||
BINARY_AVX_OP(__m256i,_mm256_min_epi32,_mm_min_epi32)
|
||||
BINARY_AVX_OP(__m256i,_mm256_max_epi32,_mm_max_epi32)
|
||||
BINARY_AVX_OP_CAST(__m256i,_mm256_min_epu32,vminq_u32,__m128i,uint32x4_t)
|
||||
BINARY_AVX_OP_CAST(__m256i,_mm256_max_epu32,vmaxq_u32,__m128i,uint32x4_t)
|
||||
|
||||
BINARY_AVX_OP(__m256,_mm256_min_ps,_mm_min_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_max_ps,_mm_max_ps)
|
||||
|
||||
BINARY_AVX_OP(__m256,_mm256_add_ps,_mm_add_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_mul_ps,_mm_mul_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_sub_ps,_mm_sub_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_div_ps,_mm_div_ps)
|
||||
|
||||
BINARY_AVX_OP(__m256,_mm256_and_ps,_mm_and_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_andnot_ps,_mm_andnot_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_or_ps,_mm_or_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_xor_ps,_mm_xor_ps)
|
||||
|
||||
BINARY_AVX_OP_CAST(__m256d,_mm256_and_pd,vandq_s64,float64x2_t,int64x2_t)
|
||||
BINARY_AVX_OP_CAST(__m256d,_mm256_or_pd,vorrq_s64,float64x2_t,int64x2_t)
|
||||
BINARY_AVX_OP_CAST(__m256d,_mm256_xor_pd,veorq_s64,float64x2_t,int64x2_t)
|
||||
|
||||
|
||||
|
||||
BINARY_AVX_OP(__m256i,_mm256_and_si256,_mm_and_si128)
|
||||
BINARY_AVX_OP(__m256i,_mm256_or_si256,_mm_or_si128)
|
||||
BINARY_AVX_OP(__m256i,_mm256_xor_si256,_mm_xor_si128)
|
||||
|
||||
|
||||
BINARY_AVX_OP(__m256,_mm256_unpackhi_ps,_mm_unpackhi_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_unpacklo_ps,_mm_unpacklo_ps)
|
||||
TERNARY_AVX_OP(__m256,_mm256_blendv_ps,_mm_blendv_ps)
|
||||
|
||||
|
||||
TERNARY_AVX_OP(__m256,_mm256_fmadd_ps,_mm_fmadd_ps)
|
||||
TERNARY_AVX_OP(__m256,_mm256_fnmadd_ps,_mm_fnmadd_ps)
|
||||
TERNARY_AVX_OP(__m256,_mm256_fmsub_ps,_mm_fmsub_ps)
|
||||
TERNARY_AVX_OP(__m256,_mm256_fnmsub_ps,_mm_fnmsub_ps)
|
||||
|
||||
|
||||
BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi32,_mm_unpackhi_epi32)
|
||||
BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi32,_mm_unpacklo_epi32)
|
||||
|
||||
|
||||
BINARY_AVX_OP(__m256i,_mm256_cmpeq_epi32,_mm_cmpeq_epi32)
|
||||
BINARY_AVX_OP(__m256i,_mm256_cmpgt_epi32,_mm_cmpgt_epi32)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpeq_ps,_mm_cmpeq_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpneq_ps,_mm_cmpneq_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpnlt_ps,_mm_cmpnlt_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpngt_ps,_mm_cmpngt_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpge_ps,_mm_cmpge_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpnge_ps,_mm_cmpnge_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmplt_ps,_mm_cmplt_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmple_ps,_mm_cmple_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpgt_ps,_mm_cmpgt_ps)
|
||||
BINARY_AVX_OP(__m256,_mm256_cmpnle_ps,_mm_cmpnle_ps)
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvtps_epi32 (__m256 a)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_cvtps_epi32(a.lo);
|
||||
res.hi = _mm_cvtps_epi32(a.hi);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvttps_epi32 (__m256 a)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_cvttps_epi32(a.lo);
|
||||
res.hi = _mm_cvttps_epi32(a.hi);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_loadu_ps (float const * mem_addr)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = *(__m128 *)(mem_addr + 0);
|
||||
res.hi = *(__m128 *)(mem_addr + 4);
|
||||
return res;
|
||||
}
|
||||
#define _mm256_load_ps _mm256_loadu_ps
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
int _mm256_testz_ps (const __m256& a, const __m256& b)
|
||||
{
|
||||
__m256 t = a;
|
||||
if (&a != &b)
|
||||
t = _mm256_and_ps(a,b);
|
||||
|
||||
__m128i l = vshrq_n_s32(__m128i(t.lo),31);
|
||||
__m128i h = vshrq_n_s32(__m128i(t.hi),31);
|
||||
return vaddvq_s32(vaddq_s32(l,h)) == 0;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0)
|
||||
{
|
||||
__m256i res;
|
||||
int64x2_t t0 = {e0,e1};
|
||||
int64x2_t t1 = {e2,e3};
|
||||
res.lo = __m128i(t0);
|
||||
res.hi = __m128i(t1);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256d _mm256_setzero_pd ()
|
||||
{
|
||||
__m256d res;
|
||||
res.lo = res.hi = vdupq_n_f64(0);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
int _mm256_movemask_pd (__m256d a)
|
||||
{
|
||||
int res = 0;
|
||||
uint64x2_t x;
|
||||
x = uint64x2_t(a.lo);
|
||||
res |= (x[0] >> 63) ? 1 : 0;
|
||||
res |= (x[0] >> 63) ? 2 : 0;
|
||||
x = uint64x2_t(a.hi);
|
||||
res |= (x[0] >> 63) ? 4 : 0;
|
||||
res |= (x[0] >> 63) ? 8 : 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = __m128i(vceqq_s64(int64x2_t(a.lo),int64x2_t(b.lo)));
|
||||
res.hi = __m128i(vceqq_s64(int64x2_t(a.hi),int64x2_t(b.hi)));
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cmpeq_pd (__m256d a, __m256d b)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = __m128i(vceqq_f64(a.lo,b.lo));
|
||||
res.hi = __m128i(vceqq_f64(a.hi,b.hi));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
int _mm256_testz_pd (const __m256d& a, const __m256d& b)
|
||||
{
|
||||
__m256d t = a;
|
||||
|
||||
if (&a != &b)
|
||||
t = _mm256_and_pd(a,b);
|
||||
|
||||
return _mm256_movemask_pd(t) == 0;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask)
|
||||
{
|
||||
__m256d res;
|
||||
uint64x2_t t = uint64x2_t(mask.lo);
|
||||
res.lo[0] = (t[0] >> 63) ? b.lo[0] : a.lo[0];
|
||||
res.lo[1] = (t[1] >> 63) ? b.lo[1] : a.lo[1];
|
||||
t = uint64x2_t(mask.hi);
|
||||
res.hi[0] = (t[0] >> 63) ? b.hi[0] : a.hi[0];
|
||||
res.hi[1] = (t[1] >> 63) ? b.hi[1] : a.hi[1];
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int imm8>
|
||||
__m256 __mm256_dp_ps (__m256 a, __m256 b)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_dp_ps(a.lo,b.lo,imm8);
|
||||
res.hi = _mm_dp_ps(a.hi,b.hi,imm8);
|
||||
return res;
|
||||
}
|
||||
|
||||
#define _mm256_dp_ps(a,b,c) __mm256_dp_ps<c>(a,b)
|
||||
|
||||
AVX2NEON_ABI
|
||||
double _mm256_permute4x64_pd_select(__m256d a, const int imm8)
|
||||
{
|
||||
switch (imm8 & 3) {
|
||||
case 0:
|
||||
return a.lo[0];
|
||||
case 1:
|
||||
return a.lo[1];
|
||||
case 2:
|
||||
return a.hi[0];
|
||||
case 3:
|
||||
return a.hi[1];
|
||||
}
|
||||
__builtin_unreachable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256d _mm256_permute4x64_pd (__m256d a, const int imm8)
|
||||
{
|
||||
__m256d res;
|
||||
res.lo[0] = _mm256_permute4x64_pd_select(a,imm8 >> 0);
|
||||
res.lo[1] = _mm256_permute4x64_pd_select(a,imm8 >> 2);
|
||||
res.hi[0] = _mm256_permute4x64_pd_select(a,imm8 >> 4);
|
||||
res.hi[1] = _mm256_permute4x64_pd_select(a,imm8 >> 6);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8)
|
||||
{
|
||||
return __m256i(_mm256_insertf128_ps((__m256)a,(__m128)b,imm8));
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_loadu_si256 (__m256i const * mem_addr)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = *(__m128i *)((int32_t *)mem_addr + 0);
|
||||
res.hi = *(__m128i *)((int32_t *)mem_addr + 4);
|
||||
return res;
|
||||
}
|
||||
|
||||
#define _mm256_load_si256 _mm256_loadu_si256
|
||||
|
||||
AVX2NEON_ABI
|
||||
void _mm256_storeu_ps (float * mem_addr, __m256 a)
|
||||
{
|
||||
*(__m128 *)(mem_addr + 0) = a.lo;
|
||||
*(__m128 *)(mem_addr + 4) = a.hi;
|
||||
|
||||
}
|
||||
|
||||
#define _mm256_store_ps _mm256_storeu_ps
|
||||
#define _mm256_stream_ps _mm256_storeu_ps
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a)
|
||||
{
|
||||
*(__m128i *)((int *)mem_addr + 0) = a.lo;
|
||||
*(__m128i *)((int *)mem_addr + 4) = a.hi;
|
||||
|
||||
}
|
||||
|
||||
#define _mm256_store_si256 _mm256_storeu_si256
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_maskload_ps(mem_addr,mask.lo);
|
||||
res.hi = _mm_maskload_ps(mem_addr + 4,mask.hi);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvtepu8_epi32 (__m128i a)
|
||||
{
|
||||
__m256i res;
|
||||
uint8x16_t x = uint8x16_t(a);
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
res.lo[i] = x[i];
|
||||
res.hi[i] = x[i+4];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvtepi8_epi32 (__m128i a)
|
||||
{
|
||||
__m256i res;
|
||||
int8x16_t x = int8x16_t(a);
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
res.lo[i] = x[i];
|
||||
res.hi[i] = x[i+4];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvtepu16_epi32 (__m128i a)
|
||||
{
|
||||
__m256i res;
|
||||
uint16x8_t x = uint16x8_t(a);
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
res.lo[i] = x[i];
|
||||
res.hi[i] = x[i+4];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_cvtepi16_epi32 (__m128i a)
|
||||
{
|
||||
__m256i res;
|
||||
int16x8_t x = int16x8_t(a);
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
res.lo[i] = x[i];
|
||||
res.hi[i] = x[i+4];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a)
|
||||
{
|
||||
_mm_maskstore_epi32(mem_addr,mask.lo,a.lo);
|
||||
_mm_maskstore_epi32(mem_addr + 4,mask.hi,a.hi);
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_slli_epi32 (__m256i a, int imm8)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_slli_epi32(a.lo,imm8);
|
||||
res.hi = _mm_slli_epi32(a.hi,imm8);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_srli_epi32 (__m256i a, int imm8)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_srli_epi32(a.lo,imm8);
|
||||
res.hi = _mm_srli_epi32(a.hi,imm8);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_srai_epi32 (__m256i a, int imm8)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_srai_epi32(a.lo,imm8);
|
||||
res.hi = _mm_srai_epi32(a.hi,imm8);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_sllv_epi32 (__m256i a, __m256i count)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = vshlq_s32(a.lo,count.lo);
|
||||
res.hi = vshlq_s32(a.hi,count.hi);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_srav_epi32 (__m256i a, __m256i count)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = vshlq_s32(a.lo,vnegq_s32(count.lo));
|
||||
res.hi = vshlq_s32(a.hi,vnegq_s32(count.hi));
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_srlv_epi32 (__m256i a, __m256i count)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = __m128i(vshlq_u32(uint32x4_t(a.lo),vnegq_s32(count.lo)));
|
||||
res.hi = __m128i(vshlq_u32(uint32x4_t(a.hi),vnegq_s32(count.hi)));
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8)
|
||||
{
|
||||
return __m256i(_mm256_permute2f128_ps(__m256(a),__m256(b),imm8));
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m128i _mm256_extractf128_si256 (__m256i a, const int imm8)
|
||||
{
|
||||
if (imm8 & 1) return a.hi;
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_set1_ps(float x)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = res.hi = vdupq_n_f32(x);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_set_ps (float e7, float e6, float e5, float e4, float e3, float e2, float e1, float e0)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_set_ps(e3,e2,e1,e0);
|
||||
res.hi = _mm_set_ps(e7,e6,e5,e4);
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_broadcast_ps (__m128 const * mem_addr)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = res.hi = *mem_addr;
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_cvtepi32_ps (__m256i a)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_cvtepi32_ps(a.lo);
|
||||
res.hi = _mm_cvtepi32_ps(a.hi);
|
||||
return res;
|
||||
}
|
||||
AVX2NEON_ABI
|
||||
void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a)
|
||||
{
|
||||
for (int i=0;i<4;i++) {
|
||||
if (mask.lo[i] & 0x80000000) mem_addr[i] = a.lo[i];
|
||||
if (mask.hi[i] & 0x80000000) mem_addr[i+4] = a.hi[i];
|
||||
}
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256d _mm256_andnot_pd (__m256d a, __m256d b)
|
||||
{
|
||||
__m256d res;
|
||||
res.lo = float64x2_t(_mm_andnot_ps(__m128(a.lo),__m128(b.lo)));
|
||||
res.hi = float64x2_t(_mm_andnot_ps(__m128(a.hi),__m128(b.hi)));
|
||||
return res;
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8)
|
||||
{
|
||||
__m256 res;
|
||||
res.lo = _mm_blend_ps(a.lo,b.lo,imm8 & 0xf);
|
||||
res.hi = _mm_blend_ps(a.hi,b.hi,imm8 >> 4);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8)
|
||||
{
|
||||
__m256i res;
|
||||
res.lo = _mm_blend_epi32(a.lo,b.lo,imm8 & 0xf);
|
||||
res.hi = _mm_blend_epi32(a.hi,b.hi,imm8 >> 4);
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale)
|
||||
{
|
||||
__m256i res;
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
res.lo[i] = *(int *)((char *) base_addr + (vindex.lo[i]*scale));
|
||||
res.hi[i] = *(int *)((char *) base_addr + (vindex.hi[i]*scale));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
AVX2NEON_ABI
|
||||
__m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale)
|
||||
{
|
||||
__m256i res = _mm256_setzero_si256();
|
||||
for (int i=0;i<4;i++)
|
||||
{
|
||||
if (mask.lo[i] >> 31) res.lo[i] = *(int *)((char *) base_addr + (vindex.lo[i]*scale));
|
||||
if (mask.hi[i] >> 31) res.hi[i] = *(int *)((char *) base_addr + (vindex.hi[i]*scale));
|
||||
}
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/alloc.h"
|
||||
#include "../sys/barrier.h"
|
||||
#include "../sys/thread.h"
|
||||
#include "../sys/mutex.h"
|
||||
#include "../sys/condition.h"
|
||||
#include "../sys/ref.h"
|
||||
|
||||
#include <dispatch/dispatch.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct TaskScheduler
|
||||
{
|
||||
/*! initializes the task scheduler */
|
||||
static void create(size_t numThreads, bool set_affinity, bool start_threads);
|
||||
|
||||
/*! destroys the task scheduler again */
|
||||
static void destroy() {}
|
||||
|
||||
/* returns the ID of the current thread */
|
||||
static __forceinline size_t threadID()
|
||||
{
|
||||
return threadIndex();
|
||||
}
|
||||
|
||||
/* returns the index (0..threadCount-1) of the current thread */
|
||||
static __forceinline size_t threadIndex()
|
||||
{
|
||||
currentThreadIndex = (currentThreadIndex + 1) % GCDNumThreads;
|
||||
return currentThreadIndex;
|
||||
}
|
||||
|
||||
/* returns the total number of threads */
|
||||
static __forceinline size_t threadCount()
|
||||
{
|
||||
return GCDNumThreads;
|
||||
}
|
||||
|
||||
private:
|
||||
static size_t GCDNumThreads;
|
||||
static size_t currentThreadIndex;
|
||||
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim);
|
||||
#if defined(__AVX__)
|
||||
void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim);
|
||||
#if defined(__AVX__)
|
||||
void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim);
|
||||
#if defined(__AVX__)
|
||||
void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim);
|
||||
#if defined(__AVX__)
|
||||
void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim);
|
||||
#if defined(__AVX__)
|
||||
void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
// Copyright 2020 Light Transport Entertainment Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "curve_intersector_virtual.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace isa
|
||||
{
|
||||
void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim);
|
||||
|
||||
#if defined (__AVX__)
|
||||
void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim);
|
||||
void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1,215 +1,630 @@
|
||||
diff --git a/common/math/math.h b/common/math/math.h
|
||||
index 5af0691a2..1982c27c1 100644
|
||||
--- a/common/math/math.h
|
||||
+++ b/common/math/math.h
|
||||
@@ -13,7 +13,7 @@
|
||||
#include <immintrin.h>
|
||||
diff --git a/thirdparty/embree/common/algorithms/parallel_for.h b/thirdparty/embree/common/algorithms/parallel_for.h
|
||||
index 76c6b740aa..51d296fb16 100644
|
||||
--- a/thirdparty/embree/common/algorithms/parallel_for.h
|
||||
+++ b/thirdparty/embree/common/algorithms/parallel_for.h
|
||||
@@ -27,7 +27,10 @@ namespace embree
|
||||
func(r.begin());
|
||||
});
|
||||
if (!TaskScheduler::wait())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
#elif defined(TASKING_GCD) && defined(BUILD_IOS)
|
||||
|
||||
@@ -55,13 +58,19 @@ namespace embree
|
||||
func(i);
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__)
|
||||
-#if (__MSV_VER <= 1700)
|
||||
+#if defined(_MSC_VER) && (_MSC_VER <= 1700)
|
||||
namespace std
|
||||
{
|
||||
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
|
||||
@@ -86,7 +86,7 @@
|
||||
return _mm_cvtss_f32(c);
|
||||
#elif defined(TASKING_PPL)
|
||||
@@ -81,7 +90,10 @@ namespace embree
|
||||
#if defined(TASKING_INTERNAL)
|
||||
TaskScheduler::spawn(first,last,minStepSize,func);
|
||||
if (!TaskScheduler::wait())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
|
||||
#elif defined(TASKING_GCD) && defined(BUILD_IOS)
|
||||
|
||||
@@ -109,13 +121,19 @@ namespace embree
|
||||
func(range<Index>(r.begin(),r.end()));
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||
func(range<Index>(r.begin(),r.end()));
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
|
||||
#elif defined(TASKING_PPL)
|
||||
@@ -147,13 +165,19 @@ namespace embree
|
||||
func(i);
|
||||
},tbb::simple_partitioner(),context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},tbb::simple_partitioner());
|
||||
if (tbb::task::self().is_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
}
|
||||
|
||||
-#if defined(__WIN32__) && (__MSC_VER <= 1700)
|
||||
+#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
|
||||
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
|
||||
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
|
||||
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
|
||||
diff --git a/common/sys/intrinsics.h b/common/sys/intrinsics.h
|
||||
index 3f0619cac..58f5c3bb4 100644
|
||||
--- a/common/sys/intrinsics.h
|
||||
+++ b/common/sys/intrinsics.h
|
||||
@@ -11,6 +11,12 @@
|
||||
@@ -168,13 +192,19 @@ namespace embree
|
||||
func(i);
|
||||
},ap,context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},ap);
|
||||
if (tbb::task::self().is_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
}
|
||||
|
||||
#include <immintrin.h>
|
||||
diff --git a/thirdparty/embree/common/algorithms/parallel_reduce.h b/thirdparty/embree/common/algorithms/parallel_reduce.h
|
||||
index d444b6a2e4..0daf94e50e 100644
|
||||
--- a/thirdparty/embree/common/algorithms/parallel_reduce.h
|
||||
+++ b/thirdparty/embree/common/algorithms/parallel_reduce.h
|
||||
@@ -58,15 +58,19 @@ namespace embree
|
||||
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||
reduction,context);
|
||||
- if (context.is_group_execution_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // if (context.is_group_execution_cancelled())
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT end --
|
||||
return v;
|
||||
#else
|
||||
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||
reduction);
|
||||
- if (tbb::task::self().is_cancelled())
|
||||
- throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT start --
|
||||
+ // if (tbb::task::self().is_cancelled())
|
||||
+ // throw std::runtime_error("task cancelled");
|
||||
+ // -- GODOT end --
|
||||
return v;
|
||||
#endif
|
||||
#else // TASKING_PPL
|
||||
diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp
|
||||
index 7e7b9faef8..98dc80ad59 100644
|
||||
--- a/thirdparty/embree/common/lexers/stringstream.cpp
|
||||
+++ b/thirdparty/embree/common/lexers/stringstream.cpp
|
||||
@@ -39,7 +39,10 @@ namespace embree
|
||||
std::vector<char> str; str.reserve(64);
|
||||
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
|
||||
int c = cin->get();
|
||||
- if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
|
||||
+ // -- GODOT start --
|
||||
+ // if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
|
||||
+ if (!isValidChar(c)) abort();
|
||||
+ // -- GODOT end --
|
||||
str.push_back((char)c);
|
||||
}
|
||||
str.push_back(0);
|
||||
diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp
|
||||
index 4e8928242e..12f143f131 100644
|
||||
--- a/thirdparty/embree/common/sys/alloc.cpp
|
||||
+++ b/thirdparty/embree/common/sys/alloc.cpp
|
||||
@@ -21,7 +21,10 @@ namespace embree
|
||||
void* ptr = _mm_malloc(size,align);
|
||||
|
||||
+// -- GODOT start --
|
||||
+#if defined(__WIN32__) && defined(__MINGW32__)
|
||||
+#include <unistd.h>
|
||||
+#endif
|
||||
+// -- GODOT end --
|
||||
+
|
||||
#if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
|
||||
#if !defined(_tzcnt_u32)
|
||||
#define _tzcnt_u32 __tzcnt_u32
|
||||
@@ -30,8 +36,14 @@
|
||||
#endif
|
||||
if (size != 0 && ptr == nullptr)
|
||||
- throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::bad_alloc();
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -128,7 +131,10 @@ namespace embree
|
||||
/* fall back to 4k pages */
|
||||
int flags = MEM_COMMIT | MEM_RESERVE;
|
||||
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
|
||||
- if (ptr == nullptr) throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // if (ptr == nullptr) throw std::bad_alloc();
|
||||
+ if (ptr == nullptr) abort();
|
||||
+ // -- GODOT end --
|
||||
hugepages = false;
|
||||
return ptr;
|
||||
}
|
||||
@@ -145,7 +151,10 @@ namespace embree
|
||||
return bytesOld;
|
||||
|
||||
#if defined(__WIN32__)
|
||||
-# define NOMINMAX
|
||||
-# include <windows.h>
|
||||
+// -- GODOT start --
|
||||
+#if !defined(NOMINMAX)
|
||||
+// -- GODOT end --
|
||||
+#define NOMINMAX
|
||||
+// -- GODOT start --
|
||||
+#endif
|
||||
+#include "windows.h"
|
||||
+// -- GODOT end --
|
||||
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
|
||||
- throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::bad_alloc();
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
@@ -156,7 +165,10 @@ namespace embree
|
||||
return;
|
||||
|
||||
if (!VirtualFree(ptr,0,MEM_RELEASE))
|
||||
- throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::bad_alloc();
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
|
||||
void os_advise(void *ptr, size_t bytes)
|
||||
@@ -260,7 +272,10 @@ namespace embree
|
||||
|
||||
/* fallback to 4k pages */
|
||||
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
- if (ptr == MAP_FAILED) throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // if (ptr == MAP_FAILED) throw std::bad_alloc();
|
||||
+ if (ptr == MAP_FAILED) abort();
|
||||
+ // -- GODOT end --
|
||||
hugepages = false;
|
||||
|
||||
/* advise huge page hint for THP */
|
||||
@@ -277,7 +292,10 @@ namespace embree
|
||||
return bytesOld;
|
||||
|
||||
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
|
||||
- throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::bad_alloc();
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
|
||||
return bytesNew;
|
||||
}
|
||||
@@ -291,7 +309,10 @@ namespace embree
|
||||
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
|
||||
bytes = (bytes+pageSize-1) & ~(pageSize-1);
|
||||
if (munmap(ptr,bytes) == -1)
|
||||
- throw std::bad_alloc();
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::bad_alloc();
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
|
||||
/* hint for transparent huge pages (THP) */
|
||||
diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h
|
||||
index 7914eb7a52..737f14aa6e 100644
|
||||
--- a/thirdparty/embree/common/sys/platform.h
|
||||
+++ b/thirdparty/embree/common/sys/platform.h
|
||||
@@ -174,11 +174,19 @@
|
||||
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
|
||||
|
||||
#if defined(DEBUG) // only report file and line in debug mode
|
||||
+ // -- GODOT start --
|
||||
+ // #define THROW_RUNTIME_ERROR(str)
|
||||
+ // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
- throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
+ printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
+ // -- GODOT start --
|
||||
+ // #define THROW_RUNTIME_ERROR(str)
|
||||
+ // throw std::runtime_error(str);
|
||||
#define THROW_RUNTIME_ERROR(str) \
|
||||
- throw std::runtime_error(str);
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
|
||||
/* normally defined in pmmintrin.h, but we always need this */
|
||||
@@ -413,8 +425,16 @@ namespace embree
|
||||
|
||||
__forceinline void pause_cpu(const size_t N = 8)
|
||||
{
|
||||
+// -- GODOT start --
|
||||
for (size_t i=0; i<N; i++)
|
||||
+#if !(defined(__WIN32__) && defined(__MINGW32__))
|
||||
+// -- GODOT end --
|
||||
_mm_pause();
|
||||
#define FATAL(x) THROW_RUNTIME_ERROR(x)
|
||||
diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.cpp b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp
|
||||
index 98d7fb9249..ebf656d1a0 100644
|
||||
--- a/thirdparty/embree/common/tasking/taskschedulerinternal.cpp
|
||||
+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp
|
||||
@@ -48,13 +48,15 @@ namespace embree
|
||||
{
|
||||
Task* prevTask = thread.task;
|
||||
thread.task = this;
|
||||
- try {
|
||||
- if (thread.scheduler->cancellingException == nullptr)
|
||||
+ // -- GODOT start --
|
||||
+ // try {
|
||||
+ // if (thread.scheduler->cancellingException == nullptr)
|
||||
closure->execute();
|
||||
- } catch (...) {
|
||||
- if (thread.scheduler->cancellingException == nullptr)
|
||||
- thread.scheduler->cancellingException = std::current_exception();
|
||||
- }
|
||||
+ // } catch (...) {
|
||||
+ // if (thread.scheduler->cancellingException == nullptr)
|
||||
+ // thread.scheduler->cancellingException = std::current_exception();
|
||||
+ // }
|
||||
+ // -- GODOT end --
|
||||
thread.task = prevTask;
|
||||
add_dependencies(-1);
|
||||
}
|
||||
@@ -297,8 +299,11 @@ namespace embree
|
||||
size_t threadIndex = allocThreadIndex();
|
||||
condition.wait(mutex, [&] () { return hasRootTask.load(); });
|
||||
mutex.unlock();
|
||||
- std::exception_ptr except = thread_loop(threadIndex);
|
||||
- if (except != nullptr) std::rethrow_exception(except);
|
||||
+ // -- GODOT start --
|
||||
+ // std::exception_ptr except = thread_loop(threadIndex);
|
||||
+ // if (except != nullptr) std::rethrow_exception(except);
|
||||
+ thread_loop(threadIndex);
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
|
||||
void TaskScheduler::reset() {
|
||||
@@ -330,7 +335,10 @@ namespace embree
|
||||
return thread->scheduler->cancellingException == nullptr;
|
||||
}
|
||||
|
||||
- std::exception_ptr TaskScheduler::thread_loop(size_t threadIndex)
|
||||
+// -- GODOT start --
|
||||
+#else
|
||||
+ usleep(1);
|
||||
+#endif
|
||||
+// std::exception_ptr TaskScheduler::thread_loop(size_t threadIndex)
|
||||
+ void TaskScheduler::thread_loop(size_t threadIndex)
|
||||
+// -- GODOT end --
|
||||
}
|
||||
|
||||
/* prefetches */
|
||||
diff --git a/common/sys/library.cpp b/common/sys/library.cpp
|
||||
index e448b195d..8ec918660 100644
|
||||
--- a/common/sys/library.cpp
|
||||
+++ b/common/sys/library.cpp
|
||||
@@ -27,7 +27,9 @@ namespace embree
|
||||
|
||||
/* returns address of a symbol from the library */
|
||||
void* getSymbol(lib_t lib, const std::string& sym) {
|
||||
- return GetProcAddress(HMODULE(lib),sym.c_str());
|
||||
{
|
||||
/* allocate thread structure */
|
||||
std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation
|
||||
@@ -353,9 +361,10 @@ namespace embree
|
||||
swapThread(oldThread);
|
||||
|
||||
/* remember exception to throw */
|
||||
- std::exception_ptr except = nullptr;
|
||||
- if (cancellingException != nullptr) except = cancellingException;
|
||||
-
|
||||
+ // -- GODOT start --
|
||||
+ return (void*) GetProcAddress(HMODULE(lib),sym.c_str());
|
||||
+ // std::exception_ptr except = nullptr;
|
||||
+ // if (cancellingException != nullptr) except = cancellingException;
|
||||
+ // -- GODOT end --
|
||||
/* wait for all threads to terminate */
|
||||
threadCounter--;
|
||||
#if defined(__WIN32__)
|
||||
@@ -373,7 +382,10 @@ namespace embree
|
||||
yield();
|
||||
#endif
|
||||
}
|
||||
- return except;
|
||||
+ // -- GODOT start --
|
||||
+ // return except;
|
||||
+ return;
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
|
||||
/* closes the shared library */
|
||||
diff --git a/common/sys/mutex.h b/common/sys/mutex.h
|
||||
index 1164210f2..f0f55340a 100644
|
||||
--- a/common/sys/mutex.h
|
||||
+++ b/common/sys/mutex.h
|
||||
@@ -47,8 +47,17 @@ namespace embree
|
||||
bool TaskScheduler::steal_from_other_threads(Thread& thread)
|
||||
diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h
|
||||
index c2a9391aea..8bd70b2b8c 100644
|
||||
--- a/thirdparty/embree/common/tasking/taskschedulerinternal.h
|
||||
+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h
|
||||
@@ -123,7 +123,10 @@ namespace embree
|
||||
{
|
||||
while (flag.load())
|
||||
{
|
||||
+// -- GODOT start --
|
||||
+#if !(defined (__WIN32__) && defined (__MINGW32__))
|
||||
+// -- GODOT end --
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
+// -- GODOT start --
|
||||
+#else
|
||||
+ __builtin_ia32_pause();
|
||||
+ __builtin_ia32_pause();
|
||||
+#endif
|
||||
+// -- GODOT end --
|
||||
}
|
||||
|
||||
bool expected = false;
|
||||
@@ -74,8 +82,17 @@ namespace embree
|
||||
{
|
||||
while(flag.load())
|
||||
size_t ofs = bytes + ((align - stackPtr) & (align-1));
|
||||
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
|
||||
- throw std::runtime_error("closure stack overflow");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("closure stack overflow");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
stackPtr += ofs;
|
||||
return &stack[stackPtr-bytes];
|
||||
}
|
||||
@@ -132,7 +135,10 @@ namespace embree
|
||||
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure)
|
||||
{
|
||||
+// -- GODOT start --
|
||||
+#if !(defined (__WIN32__) && defined(__MINGW32__))
|
||||
+// -- GODOT end --
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
+// -- GODOT start --
|
||||
+#else
|
||||
+ __builtin_ia32_pause();
|
||||
+ __builtin_ia32_pause();
|
||||
+#endif
|
||||
+// -- GODOT end --
|
||||
if (right >= TASK_STACK_SIZE)
|
||||
- throw std::runtime_error("task stack overflow");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("task stack overflow");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
|
||||
/* allocate new task on right side of stack */
|
||||
size_t oldStackPtr = stackPtr;
|
||||
@@ -239,7 +245,10 @@ namespace embree
|
||||
void wait_for_threads(size_t threadCount);
|
||||
|
||||
/*! thread loop for all worker threads */
|
||||
- std::exception_ptr thread_loop(size_t threadIndex);
|
||||
+ // -- GODOT start --
|
||||
+ // std::exception_ptr thread_loop(size_t threadIndex);
|
||||
+ void thread_loop(size_t threadIndex);
|
||||
+ // -- GODOT end --
|
||||
|
||||
/*! steals a task from a different thread */
|
||||
bool steal_from_other_threads(Thread& thread);
|
||||
diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
|
||||
index 20cdd2d320..aa56035026 100644
|
||||
--- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
|
||||
+++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
|
||||
@@ -150,7 +150,10 @@ namespace embree
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/common/sys/platform.h b/common/sys/platform.h
|
||||
index 96f9aab01..08617452f 100644
|
||||
--- a/common/sys/platform.h
|
||||
+++ b/common/sys/platform.h
|
||||
@@ -141,6 +141,9 @@
|
||||
#define DELETED = delete
|
||||
else {
|
||||
- throw std::runtime_error("not supported node type in bvh_statistics");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("not supported node type in bvh_statistics");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
return s;
|
||||
}
|
||||
diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp
|
||||
index ee5c37b238..625fbf6d4f 100644
|
||||
--- a/thirdparty/embree/kernels/common/rtcore.cpp
|
||||
+++ b/thirdparty/embree/kernels/common/rtcore.cpp
|
||||
@@ -230,7 +230,10 @@ RTC_NAMESPACE_BEGIN;
|
||||
if (quality != RTC_BUILD_QUALITY_LOW &&
|
||||
quality != RTC_BUILD_QUALITY_MEDIUM &&
|
||||
quality != RTC_BUILD_QUALITY_HIGH)
|
||||
- throw std::runtime_error("invalid build quality");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("invalid build quality");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
scene->setBuildQuality(quality);
|
||||
RTC_CATCH_END2(scene);
|
||||
}
|
||||
@@ -1383,7 +1386,10 @@ RTC_NAMESPACE_BEGIN;
|
||||
quality != RTC_BUILD_QUALITY_MEDIUM &&
|
||||
quality != RTC_BUILD_QUALITY_HIGH &&
|
||||
quality != RTC_BUILD_QUALITY_REFIT)
|
||||
- throw std::runtime_error("invalid build quality");
|
||||
+ // -- GODOT start --
|
||||
+ // throw std::runtime_error("invalid build quality");
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
geometry->setBuildQuality(quality);
|
||||
RTC_CATCH_END2(geometry);
|
||||
}
|
||||
diff --git a/thirdparty/embree/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h
|
||||
index 6583d12d57..4b070e122b 100644
|
||||
--- a/thirdparty/embree/kernels/common/rtcore.h
|
||||
+++ b/thirdparty/embree/kernels/common/rtcore.h
|
||||
@@ -25,52 +25,58 @@ namespace embree
|
||||
#endif
|
||||
|
||||
/*! Macros used in the rtcore API implementation */
|
||||
-#define RTC_CATCH_BEGIN try {
|
||||
+// -- GODOT start --
|
||||
+#if !defined(likely)
|
||||
+// -- GODOT end --
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
#define likely(expr) (expr)
|
||||
#define unlikely(expr) (expr)
|
||||
@@ -148,6 +151,9 @@
|
||||
#define likely(expr) __builtin_expect((bool)(expr),true )
|
||||
#define unlikely(expr) __builtin_expect((bool)(expr),false)
|
||||
#endif
|
||||
+// -- GODOT start --
|
||||
+#endif
|
||||
+// #define RTC_CATCH_BEGIN try {
|
||||
+#define RTC_CATCH_BEGIN
|
||||
|
||||
-#define RTC_CATCH_END(device) \
|
||||
- } catch (std::bad_alloc&) { \
|
||||
- Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
- } catch (rtcore_error& e) { \
|
||||
- Device::process_error(device,e.error,e.what()); \
|
||||
- } catch (std::exception& e) { \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
- } catch (...) { \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
- }
|
||||
+// #define RTC_CATCH_END(device) \
|
||||
+// } catch (std::bad_alloc&) { \
|
||||
+// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
+// } catch (rtcore_error& e) { \
|
||||
+// Device::process_error(device,e.error,e.what()); \
|
||||
+// } catch (std::exception& e) { \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
+// } catch (...) { \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
+// }
|
||||
+#define RTC_CATCH_END(device)
|
||||
|
||||
-#define RTC_CATCH_END2(scene) \
|
||||
- } catch (std::bad_alloc&) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
- } catch (rtcore_error& e) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,e.error,e.what()); \
|
||||
- } catch (std::exception& e) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
- } catch (...) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
- }
|
||||
+// #define RTC_CATCH_END2(scene) \
|
||||
+// } catch (std::bad_alloc&) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
+// } catch (rtcore_error& e) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,e.error,e.what()); \
|
||||
+// } catch (std::exception& e) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
+// } catch (...) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
+// }
|
||||
+#define RTC_CATCH_END2(scene)
|
||||
|
||||
-#define RTC_CATCH_END2_FALSE(scene) \
|
||||
- } catch (std::bad_alloc&) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
- return false; \
|
||||
- } catch (rtcore_error& e) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,e.error,e.what()); \
|
||||
- return false; \
|
||||
- } catch (std::exception& e) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
- return false; \
|
||||
- } catch (...) { \
|
||||
- Device* device = scene ? scene->device : nullptr; \
|
||||
- Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
- return false; \
|
||||
- }
|
||||
+// #define RTC_CATCH_END2_FALSE(scene) \
|
||||
+// } catch (std::bad_alloc&) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_OUT_OF_MEMORY,"out of memory"); \
|
||||
+// return false; \
|
||||
+// } catch (rtcore_error& e) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,e.error,e.what()); \
|
||||
+// return false; \
|
||||
+// } catch (std::exception& e) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,e.what()); \
|
||||
+// return false; \
|
||||
+// } catch (...) { \
|
||||
+// Device* device = scene ? scene->device : nullptr; \
|
||||
+// Device::process_error(device,RTC_ERROR_UNKNOWN,"unknown exception caught"); \
|
||||
+// return false; \
|
||||
+// }
|
||||
+#define RTC_CATCH_END2_FALSE(scene) return false;
|
||||
+// -- GODOT end --
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Error handling and debugging
|
||||
diff --git a/common/sys/sysinfo.cpp b/common/sys/sysinfo.cpp
|
||||
index eb0a10eaf..74438260d 100644
|
||||
--- a/common/sys/sysinfo.cpp
|
||||
+++ b/common/sys/sysinfo.cpp
|
||||
@@ -233,7 +233,7 @@ namespace embree
|
||||
#define RTC_VERIFY_HANDLE(handle) \
|
||||
if (handle == nullptr) { \
|
||||
@@ -97,28 +103,38 @@ namespace embree
|
||||
#define RTC_TRACE(x)
|
||||
#endif
|
||||
|
||||
__noinline int64_t get_xcr0()
|
||||
{
|
||||
-#if defined (__WIN32__)
|
||||
+#if defined (__WIN32__) /* -- GODOT start -- */ && !defined (__MINGW32__) /* -- GODOT end -- */
|
||||
int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32
|
||||
xcr0 = _xgetbv(0);
|
||||
return xcr0;
|
||||
diff --git a/common/tasking/taskschedulerinternal.cpp b/common/tasking/taskschedulerinternal.cpp
|
||||
index 2152e92f4..923d62f83 100644
|
||||
--- a/common/tasking/taskschedulerinternal.cpp
|
||||
+++ b/common/tasking/taskschedulerinternal.cpp
|
||||
@@ -361,7 +361,15 @@ namespace embree
|
||||
if ((loopIndex % LOOP_YIELD_THRESHOLD) == 0)
|
||||
yield();
|
||||
else
|
||||
+// -- GODOT start --
|
||||
+#if !defined(__MINGW32__)
|
||||
- /*! used to throw embree API errors */
|
||||
- struct rtcore_error : public std::exception
|
||||
- {
|
||||
- __forceinline rtcore_error(RTCError error, const std::string& str)
|
||||
- : error(error), str(str) {}
|
||||
-
|
||||
- ~rtcore_error() throw() {}
|
||||
-
|
||||
- const char* what () const throw () {
|
||||
- return str.c_str();
|
||||
- }
|
||||
-
|
||||
- RTCError error;
|
||||
- std::string str;
|
||||
- };
|
||||
+// -- GODOT begin --
|
||||
+// /*! used to throw embree API errors */
|
||||
+// struct rtcore_error : public std::exception
|
||||
+// {
|
||||
+// __forceinline rtcore_error(RTCError error, const std::string& str)
|
||||
+// : error(error), str(str) {}
|
||||
+//
|
||||
+// ~rtcore_error() throw() {}
|
||||
+//
|
||||
+// const char* what () const throw () {
|
||||
+// return str.c_str();
|
||||
+// }
|
||||
+//
|
||||
+// RTCError error;
|
||||
+// std::string str;
|
||||
+// };
|
||||
+// -- GODOT end --
|
||||
_mm_pause();
|
||||
+// -- GODOT start --
|
||||
+#else
|
||||
+ usleep(1);
|
||||
+#endif
|
||||
+// -- GODOT end --
|
||||
loopIndex++;
|
||||
#else
|
||||
yield();
|
||||
diff --git a/common/tasking/taskschedulertbb.h b/common/tasking/taskschedulertbb.h
|
||||
index 98dba2687..369e5edf0 100644
|
||||
--- a/common/tasking/taskschedulertbb.h
|
||||
+++ b/common/tasking/taskschedulertbb.h
|
||||
@@ -12,7 +12,13 @@
|
||||
#include "../sys/ref.h"
|
||||
|
||||
#if defined(__WIN32__)
|
||||
+// -- GODOT start --
|
||||
+#if !defined(NOMINMAX)
|
||||
+// -- GODOT end --
|
||||
# define NOMINMAX
|
||||
+// -- GODOT start --
|
||||
+#endif
|
||||
+// -- GODOT end --
|
||||
#if defined(DEBUG) // only report file and line in debug mode
|
||||
+ // -- GODOT begin --
|
||||
+ // #define throw_RTCError(error,str) \
|
||||
+ // throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
#define throw_RTCError(error,str) \
|
||||
- throw rtcore_error(error,std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
|
||||
+ printf(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)), abort();
|
||||
+ // -- GODOT end --
|
||||
#else
|
||||
+ // -- GODOT begin --
|
||||
+ // #define throw_RTCError(error,str) \
|
||||
+ // throw rtcore_error(error,str);
|
||||
#define throw_RTCError(error,str) \
|
||||
- throw rtcore_error(error,str);
|
||||
+ abort();
|
||||
+ // -- GODOT end --
|
||||
#endif
|
||||
|
||||
// We need to define these to avoid implicit linkage against
|
||||
diff a/include/embree3/rtcore_common.h b/include/embree3/rtcore_common.h
|
||||
--- a/include/embree3/rtcore_common.h
|
||||
+++ b/include/embree3/rtcore_common.h
|
||||
@@ -19,7 +19,7 @@
|
||||
#endif
|
||||
#endif
|
||||
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
|
||||
diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp
|
||||
index e75aa968f9..1e23aeb415 100644
|
||||
--- a/thirdparty/embree/kernels/common/scene.cpp
|
||||
+++ b/thirdparty/embree/kernels/common/scene.cpp
|
||||
@@ -800,16 +800,18 @@ namespace embree
|
||||
}
|
||||
|
||||
-#ifdef _WIN32
|
||||
+#if defined(_WIN32) && defined(_MSC_VER)
|
||||
# define RTC_ALIGN(...) __declspec(align(__VA_ARGS__))
|
||||
#else
|
||||
# define RTC_ALIGN(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
/* initiate build */
|
||||
- try {
|
||||
+ // -- GODOT start --
|
||||
+ // try {
|
||||
scheduler->spawn_root([&]() { commit_task(); Lock<MutexSys> lock(schedulerMutex); this->scheduler = nullptr; }, 1, !join);
|
||||
- }
|
||||
- catch (...) {
|
||||
- accels_clear();
|
||||
- updateInterface();
|
||||
- Lock<MutexSys> lock(schedulerMutex);
|
||||
- this->scheduler = nullptr;
|
||||
- throw;
|
||||
- }
|
||||
+ // }
|
||||
+ // catch (...) {
|
||||
+ // accels_clear();
|
||||
+ // updateInterface();
|
||||
+ // Lock<MutexSys> lock(schedulerMutex);
|
||||
+ // this->scheduler = nullptr;
|
||||
+ // throw;
|
||||
+ // }
|
||||
+ // -- GODOT end --
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Loading…
Reference in New Issue