// 🤖 This file is assembled from several headers to compile scripts and shaders at runtime #define VFX_IS_UBER_HEADER #define VFX_USES_RE_SHADERS 0 #ifdef __METAL_VERSION__ #include #include #include #include #else #include #include #include #include #include #ifdef __OBJC__ #include #endif // __OBJC__ #ifdef __cplusplus #include #include #include using namespace simd; #endif #include #endif // !__METAL_VERSION__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-function" #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wglobal-constructors" #pragma clang diagnostic ignored "-Wunreachable-code" #pragma clang diagnostic ignored "-Wundef" // MARK: - vfx_base.h #ifndef VFX_BASE #define VFX_BASE #define VFX_MATH_DEBUG_CHECK 0 #if !defined(VFX_IS_UBER_HEADER) && !defined(__METAL_VERSION__) && VFX_MATH_DEBUG_CHECK #define VFX_MATH_ASSERT(e) (assert(e)) #else #define VFX_MATH_ASSERT(e) ((void)0) #endif #ifndef __METAL_VERSION__ #else using namespace metal; #endif #ifdef __cplusplus #define VFX_EXTERN extern "C" #define VFX_EXPORT_SIMD extern "C" __attribute__((visibility("default"))) #define VFX_EXPORT_C __attribute__((visibility("default"))) extern "C" #define VFX_EXPORT_C_BEGIN extern "C" { #define VFX_EXPORT_C_END } #else #define VFX_EXTERN extern #define VFX_EXPORT_SIMD extern __attribute__((visibility("default"))) #define VFX_EXPORT_C #define VFX_EXPORT_C_BEGIN #define VFX_EXPORT_C_END #endif #ifdef DEBUG #define VFX_ASSERT(a) assert(a) #else #define VFX_ASSERT(a) #endif #ifndef NS_ASSUME_NONNULL_BEGIN #define NS_ASSUME_NONNULL_BEGIN _Pragma("clang assume_nonnull begin") #endif #ifndef NS_ASSUME_NONNULL_END #define NS_ASSUME_NONNULL_END _Pragma("clang assume_nonnull end") #endif #ifdef __METAL_VERSION__ #define VFX_OVERLOAD #define VFX_SIMD #define VFX_SIMD_UNPURE #define vfx_out thread #define vfx_device_out device #define vfx_ref thread #define vfx_constant_ref constant #else #define VFX_OVERLOAD __attribute__((overloadable)) // This is for pure functions. no side-effect allowed #define VFX_SIMD VFX_OVERLOAD __attribute__((always_inline, __const__, __nodebug__)) #define VFX_SIMD_UNPURE VFX_OVERLOAD __attribute__((always_inline, __nodebug__)) #define vfx_out #define vfx_device_out #define vfx_ref #define vfx_constant_ref #endif #define VFX_ENUM(type) enum __attribute__((enum_extensibility(closed))) : type // Packed types typedef struct { float x, y; } vfx_packed_float2; typedef struct { float x, y, z; } vfx_packed_float3; typedef struct { float x, y, z, w; } vfx_packed_float4; typedef struct { uint16_t x, y; } vfx_packed_half2; typedef struct { uint16_t x, y, z; } vfx_packed_half3; typedef struct { uint16_t x, y, z, w; } vfx_packed_half4; // Axis Aligned Bounding Box (need to maintain [center.w = 1.f & half_size.w = 0.f]) typedef struct { simd_float4 center_w1; simd_float4 half_size_w0; } vfx_aabb; // use this instead of simd_float2 when you have no parallelism in operations typedef struct { float lower_bound, upper_bound; } vfx_float_range; typedef struct { size_t lower_bound, upper_bound; } vfx_int_range; typedef struct { float scale, bias; } vfx_scale_bias; // 3D Ray / Segment typedef struct { simd_float4 origin; // w = 1 simd_float4 direction; // xyz = normalized, w = 0 simd_float4 invdir_length; // xyz = 1 / dir, w = length, INF for ray, >0 for segment } vfx_ray; typedef struct { simd_float3 a, b, c; } vfx_triangle; typedef struct { simd_float4 vector; } vfx_plane; // normal(x, y, z) + d typedef struct { simd_float4 vector; } vfx_sphere; // xyz: center w: radius // frustum (most of the time a truncated pyramid) consisting of 6 planes (pointing inward?) // order of the planes : right(0) - left(1) - bottom(2) - top(3) - far(4) - near(5) typedef struct { vfx_plane planes[6]; } vfx_frustum; // Oriented Bounding Box typedef struct { simd_float4 center; // center.w should always be 1 simd_float3x4 axis; simd_float4 halfSize; // we could store the halfSize in the w of axis... } vfx_obb; typedef struct { simd_float3 apex; // Tip of the cone simd_float3 direction; float height; float radius; float cosAngle; float tanAngle; } vfx_cone; typedef struct { simd_float3 center; simd_float3 normal; simd_float3 tangent; simd_packed_float2 halfExtents; } vfx_ellipse; typedef struct { simd_float3 center; simd_float3 halfExtents; } vfx_ellipsoid; typedef VFX_ENUM(int8_t) { vfx_classify_inside = 0, vfx_classify_outside = 1, vfx_classify_crossing = 2 } vfx_classify_state; #endif // VFX_BASE // MARK: - vfx_math.h #ifndef VFX_MATH_H #define VFX_MATH_H #define VFX_DEBUG_INTERSECTION 0 #define VFX_INTERSECTION_ERROR_THRESHOLD 0.01 NS_ASSUME_NONNULL_BEGIN // base math functions #ifdef __METAL_VERSION__ // constants #define VFX_PI M_PI_F #define VFX_1_PI M_1_PI_F #define VFX_PI_2 M_PI_2_F #define VFX_2PI (2.f * M_PI_F) #define VFX_EPSILON FLT_EPSILON #define VFX_MIN FLT_MIN #define VFX_INFINITY INFINITY #define VFX_NAN NAN // make are just ctors in C++/Metal static inline simd_float2 VFX_SIMD vfx_make_float2(float x, float y) { return float2(x, y); } static inline simd_float3 VFX_SIMD vfx_make_float3(float x, float y, float z) { return float3(x, y, z); } static inline simd_float3 VFX_SIMD vfx_make_float3(simd_float2 xy, float z) { return float3(xy, z); } static inline simd_float3 VFX_SIMD vfx_make_float3(float x, simd_float2 yz) { return float3(x, yz); } static inline simd_float4 VFX_SIMD vfx_make_float4(float x, float y, float z, float w) { return float4(x, y, z, w); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float2 xy, float z, float w) { return float4(xy, z, w); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float3 xyz, float w) { return float4(xyz, w); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float3 xyz) { float4 r = 0; r.xyz = xyz; return r; } static inline simd_float4 VFX_SIMD vfx_make_float4_undef(simd_float3 xyz) { float4 r; r.xyz = xyz; return r; } #define vfx_bool4 bool4 #define vfx_convert_char4 char4 #define vfx_convert_uchar4 uchar4 #define vfx_convert_short4 short4 #define vfx_convert_ushort4 ushort4 #define vfx_convert_int3 int3 #define vfx_convert_int4 int4 #define vfx_convert_uint4 uint4 #define vfx_convert_float4 float4 #define vfx_any(a) any(a) #define vfx_all(a) all(a) #define vfx_select(a, b, c) select(a, b, c) static inline float VFX_SIMD vfx_min(float a, float b) { return fmin(a, b); } static inline simd_float2 VFX_SIMD vfx_min(simd_float2 a, simd_float2 b) { return fmin(a, b); } static inline simd_float3 VFX_SIMD vfx_min(simd_float3 a, simd_float3 b) { return fmin(a, b); } static inline simd_float4 VFX_SIMD vfx_min(simd_float4 a, simd_float4 b) { return fmin(a, b); } static inline float VFX_SIMD vfx_max(float a, float b) { return fmax(a, b); } static inline simd_float2 VFX_SIMD vfx_max(simd_float2 a, simd_float2 b) { return fmax(a, b); } static inline simd_float3 VFX_SIMD vfx_max(simd_float3 a, simd_float3 b) { return fmax(a, b); } static inline simd_float4 VFX_SIMD vfx_max(simd_float4 a, simd_float4 b) { return fmax(a, b); } #define vfx_abs(a) fabs(a) #define vfx_copysign(a, b) copysign((a), (b)) #define vfx_ceil(a) ceil(a) #define vfx_floor(a) floor(a) #define vfx_rint(a) rint(a) #define vfx_trunc(a) trunc(a) #define vfx_sign(a) sign(a) #define vfx_step(a,b) step(a, b) #define vfx_fract(a) fract(a) #define vfx_fmod(a, b) fmod(a, b) #define vfx_recip(a) (1/(a)) #define vfx_sqrt(a) sqrt(a) #define vfx_rsqrt(a) rsqrt(a) #define vfx_precise_rsqrt(a) precise::rsqrt(a) #define vfx_cbrt(a) pow(a, 1.f/3.f) #define vfx_cos(a) cos(a) #define vfx_sin(a) sin(a) #define vfx_tan(a) tan(a) #define vfx_acos(a) acos(a) #define vfx_asin(a) asin(a) #define vfx_atan(a) atan(a) #define vfx_atan2(a, b) atan2(a, b) #define vfx_acosh(a) acosh(a) #define vfx_asinh(a) asinh(a) #define vfx_atanh(a) atanh(a) #define vfx_sinpi(a) sinpi(a) #define vfx_cospi(a) cospi(a) #define vfx_tanpi(a) tanpi(a) #define vfx_exp(a) exp(a) #define vfx_exp2(a) exp2(a) #define vfx_exp10(a) exp10(a) #define vfx_log(a) log(a) #define vfx_pow(a,b) pow(a, b) #define vfx_length(a) length(a) #define vfx_length_squared(a) length_squared(a) #define vfx_distance(a, b) distance(a, b) #define vfx_distance_squared(a, b) distance_squared(a, b) #define vfx_normalize(a) normalize(a) #define vfx_cross(a, b) cross(a, b) #define vfx_dot(a, b) dot(a, b) #define vfx_transpose(a) transpose(a) #define vfx_is_infinite(a) isinf(a) #define vfx_muladd fma #define simd_bitselect select template static inline T VFX_SIMD vfx_reduce_min(vec a) { return fmin(a.x, a.y); } template static inline T VFX_SIMD vfx_reduce_min(vec a) { return fmin3(a.x, a.y, a.z); } template static inline T VFX_SIMD vfx_reduce_min(vec a) { return vfx_reduce_min(min(a.xy, a.zw)); } template static inline T VFX_SIMD vfx_reduce_max(vec a) { return fmax(a.x, a.y); } template static inline T VFX_SIMD vfx_reduce_max(vec a) { return fmax3(a.x, a.y, a.z); } template static inline T VFX_SIMD vfx_reduce_max(vec a) { return vfx_reduce_max(max(a.xy, a.zw)); } template static inline T VFX_SIMD vfx_reduce_add(vec a) { return a.x + a.y; } template static inline T VFX_SIMD vfx_reduce_add(vec a) { return a.x + a.y + a.z; } template static inline T VFX_SIMD vfx_reduce_add(vec a) { return a.x + a.y + a.z + a.w; } #else // constants #define VFX_PI (float)(M_PI) #define VFX_1_PI (float)(M_1_PI) #define VFX_PI_2 (float)(M_PI_2) #define VFX_2PI (2.f * (float)(M_PI)) #define VFX_EPSILON FLT_EPSILON // 1.19209290E-07F #define VFX_MIN 1.17549435e-38f // FLT_MIN #define VFX_INFINITY INFINITY #define VFX_NAN NAN // Macros are not exported correctly in swift so we have to do static inline wrapping functions static inline simd_float2 VFX_SIMD vfx_make_float2(float x, float y) { return simd_make_float2(x, y); } static inline simd_float3 VFX_SIMD vfx_make_float3(float x, float y, float z) { return simd_make_float3(x, y, z); } static inline simd_float3 VFX_SIMD vfx_make_float3(simd_float2 xy, float z) { return simd_make_float3(xy, z); } static inline simd_float3 VFX_SIMD vfx_make_float3(float x, simd_float2 yz) { return simd_make_float3(x, yz); } static inline simd_float4 VFX_SIMD vfx_make_float4(float x, float y, float z, float w) { return simd_make_float4(x, y, z, w); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float3 xyz, float w) { return simd_make_float4(xyz, w); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float3 xyz) { return simd_make_float4(xyz); } static inline simd_float4 VFX_SIMD vfx_make_float4(simd_float2 xy, float z, float w) { return simd_make_float4(xy, z, w); } static inline simd_float4 VFX_SIMD vfx_make_float4_undef(simd_float3 xyz) { return simd_make_float4_undef(xyz); } #define vfx_bool4 simd_int4 #define vfx_convert_char4 simd_char #define vfx_convert_uchar4 simd_uchar #define vfx_convert_short4 simd_short #define vfx_convert_ushort4 simd_ushort #define vfx_convert_int3 simd_int #define vfx_convert_int4 simd_int #define vfx_convert_uint4 simd_uint #define vfx_convert_float4 simd_float #define vfx_any(a) simd_any(a) #define vfx_all(a) simd_all(a) static inline float VFX_SIMD vfx_select(float a, float b, int c) { return (c < 0) ? b : a; } static inline simd_float2 VFX_SIMD vfx_select(simd_float2 a, simd_float2 b, simd_int2 c) { return simd_select(a, b, c); } static inline simd_float3 VFX_SIMD vfx_select(simd_float3 a, simd_float3 b, simd_int3 c) { return simd_select(a, b, c); } static inline simd_float4 VFX_SIMD vfx_select(simd_float4 a, simd_float4 b, simd_int4 c) { return simd_select(a, b, c); } static inline int64_t VFX_SIMD vfx_min(int64_t a, int64_t b) { return a < b ? a : b; } static inline uint64_t VFX_SIMD vfx_min(uint64_t a, uint64_t b) { return a < b ? a : b; } static inline float VFX_SIMD vfx_min(float a, float b) { return __tg_fmin(a, b); } static inline simd_float2 VFX_SIMD vfx_min(simd_float2 a, simd_float2 b) { return __tg_fmin(a, b); } static inline simd_float3 VFX_SIMD vfx_min(simd_float3 a, simd_float3 b) { return __tg_fmin(a, b); } static inline simd_float4 VFX_SIMD vfx_min(simd_float4 a, simd_float4 b) { return __tg_fmin(a, b); } static inline int64_t VFX_SIMD vfx_max(int64_t a, int64_t b) { return a > b ? a : b; } static inline uint64_t VFX_SIMD vfx_max(uint64_t a, uint64_t b) { return a > b ? a : b; } static inline float VFX_SIMD vfx_max(float a, float b) { return __tg_fmax(a, b); } static inline simd_float2 VFX_SIMD vfx_max(simd_float2 a, simd_float2 b) { return __tg_fmax(a, b); } static inline simd_float3 VFX_SIMD vfx_max(simd_float3 a, simd_float3 b) { return __tg_fmax(a, b); } static inline simd_float4 VFX_SIMD vfx_max(simd_float4 a, simd_float4 b) { return __tg_fmax(a, b); } static inline float VFX_SIMD vfx_abs(float a) { return __tg_fabs(a); } static inline simd_float2 VFX_SIMD vfx_abs(simd_float2 a) { return __tg_fabs(a); } static inline simd_float3 VFX_SIMD vfx_abs(simd_float3 a) { return __tg_fabs(a); } static inline simd_float4 VFX_SIMD vfx_abs(simd_float4 a) { return __tg_fabs(a); } static inline float VFX_SIMD vfx_copysign(float a, float b) { return __tg_copysign(a, b); } static inline simd_float2 VFX_SIMD vfx_copysign(simd_float2 a, simd_float2 b) { return __tg_copysign(a, b); } static inline simd_float3 VFX_SIMD vfx_copysign(simd_float3 a, simd_float3 b) { return __tg_copysign(a, b); } static inline simd_float4 VFX_SIMD vfx_copysign(simd_float4 a, simd_float4 b) { return __tg_copysign(a, b); } static inline float VFX_SIMD vfx_ceil(float a) { return __tg_ceil(a); } static inline simd_float2 VFX_SIMD vfx_ceil(simd_float2 a) { return __tg_ceil(a); } static inline simd_float3 VFX_SIMD vfx_ceil(simd_float3 a) { return __tg_ceil(a); } static inline simd_float4 VFX_SIMD vfx_ceil(simd_float4 a) { return __tg_ceil(a); } static inline float VFX_SIMD vfx_floor(float a) { return __tg_floor(a); } static inline simd_float2 VFX_SIMD vfx_floor(simd_float2 a) { return __tg_floor(a); } static inline simd_float3 VFX_SIMD vfx_floor(simd_float3 a) { return __tg_floor(a); } static inline simd_float4 VFX_SIMD vfx_floor(simd_float4 a) { return __tg_floor(a); } static inline float VFX_SIMD vfx_rint(float a) { return __tg_rint(a); } static inline simd_float2 VFX_SIMD vfx_rint(simd_float2 a) { return __tg_rint(a); } static inline simd_float3 VFX_SIMD vfx_rint(simd_float3 a) { return __tg_rint(a); } static inline simd_float4 VFX_SIMD vfx_rint(simd_float4 a) { return __tg_rint(a); } static inline float VFX_SIMD vfx_trunc(float a) { return __tg_trunc(a); } static inline simd_float2 VFX_SIMD vfx_trunc(simd_float2 a) { return __tg_trunc(a); } static inline simd_float3 VFX_SIMD vfx_trunc(simd_float3 a) { return __tg_trunc(a); } static inline simd_float4 VFX_SIMD vfx_trunc(simd_float4 a) { return __tg_trunc(a); } static inline float VFX_SIMD vfx_sign(float a) { return simd_sign(a); } static inline simd_float2 VFX_SIMD vfx_sign(simd_float2 a) { return simd_sign(a); } static inline simd_float3 VFX_SIMD vfx_sign(simd_float3 a) { return simd_sign(a); } static inline simd_float4 VFX_SIMD vfx_sign(simd_float4 a) { return simd_sign(a); } static inline float VFX_SIMD vfx_step(float a, float b) { return a >= b; } static inline simd_float2 VFX_SIMD vfx_step(simd_float2 a, simd_float2 b) { return simd_step(a, b); } static inline simd_float3 VFX_SIMD vfx_step(simd_float3 a, simd_float3 b) { return simd_step(a, b); } static inline simd_float4 VFX_SIMD vfx_step(simd_float4 a, simd_float4 b) { return simd_step(a, b); } static inline float VFX_SIMD vfx_fract(float a) { return simd_fract(a); } static inline simd_float2 VFX_SIMD vfx_fract(simd_float2 a) { return simd_fract(a); } static inline simd_float3 VFX_SIMD vfx_fract(simd_float3 a) { return simd_fract(a); } static inline simd_float4 VFX_SIMD vfx_fract(simd_float4 a) { return simd_fract(a); } static inline float VFX_SIMD vfx_fmod(float a, float b) { return __tg_fmod(a, b); } static inline simd_float2 VFX_SIMD vfx_fmod(simd_float2 a, simd_float2 b) { return __tg_fmod(a, b); } static inline simd_float3 VFX_SIMD vfx_fmod(simd_float3 a, simd_float3 b) { return __tg_fmod(a, b); } static inline simd_float4 VFX_SIMD vfx_fmod(simd_float4 a, simd_float4 b) { return __tg_fmod(a, b); } static inline float VFX_SIMD vfx_recip(float a) { return simd_recip(a); } static inline simd_float2 VFX_SIMD vfx_recip(simd_float2 a) { return simd_recip(a); } static inline simd_float3 VFX_SIMD vfx_recip(simd_float3 a) { return simd_recip(a); } static inline simd_float4 VFX_SIMD vfx_recip(simd_float4 a) { return simd_recip(a); } static inline float VFX_SIMD vfx_sqrt(float a) { return __tg_sqrt(a); } static inline simd_float2 VFX_SIMD vfx_sqrt(simd_float2 a) { return __tg_sqrt(a); } static inline simd_float3 VFX_SIMD vfx_sqrt(simd_float3 a) { return __tg_sqrt(a); } static inline simd_float4 VFX_SIMD vfx_sqrt(simd_float4 a) { return __tg_sqrt(a); } static inline float VFX_SIMD vfx_rsqrt(float a) { return simd_rsqrt(a); } static inline simd_float2 VFX_SIMD vfx_rsqrt(simd_float2 a) { return simd_rsqrt(a); } static inline simd_float3 VFX_SIMD vfx_rsqrt(simd_float3 a) { return simd_rsqrt(a); } static inline simd_float4 VFX_SIMD vfx_rsqrt(simd_float4 a) { return simd_rsqrt(a); } static inline float VFX_SIMD vfx_precise_rsqrt(float a) { return simd_precise_rsqrt(a); } static inline simd_float2 VFX_SIMD vfx_precise_rsqrt(simd_float2 a) { return simd_precise_rsqrt(a); } static inline simd_float3 VFX_SIMD vfx_precise_rsqrt(simd_float3 a) { return simd_precise_rsqrt(a); } static inline simd_float4 VFX_SIMD vfx_precise_rsqrt(simd_float4 a) { return simd_precise_rsqrt(a); } static inline float VFX_SIMD vfx_cbrt(float a) { return __tg_cbrt(a); } static inline simd_float2 VFX_SIMD vfx_cbrt(simd_float2 a) { return __tg_cbrt(a); } static inline simd_float3 VFX_SIMD vfx_cbrt(simd_float3 a) { return __tg_cbrt(a); } static inline simd_float4 VFX_SIMD vfx_cbrt(simd_float4 a) { return __tg_cbrt(a); } static inline float VFX_SIMD vfx_cos(float a) { return __tg_cos(a); } static inline simd_float2 VFX_SIMD vfx_cos(simd_float2 a) { return __tg_cos(a); } static inline simd_float3 VFX_SIMD vfx_cos(simd_float3 a) { return __tg_cos(a); } static inline simd_float4 VFX_SIMD vfx_cos(simd_float4 a) { return __tg_cos(a); } static inline float VFX_SIMD vfx_sin(float a) { return __tg_sin(a); } static inline simd_float2 VFX_SIMD vfx_sin(simd_float2 a) { return __tg_sin(a); } static inline simd_float3 VFX_SIMD vfx_sin(simd_float3 a) { return __tg_sin(a); } static inline simd_float4 VFX_SIMD vfx_sin(simd_float4 a) { return __tg_sin(a); } static inline float VFX_SIMD vfx_tan(float a) { return __tg_tan(a); } static inline simd_float2 VFX_SIMD vfx_tan(simd_float2 a) { return __tg_tan(a); } static inline simd_float3 VFX_SIMD vfx_tan(simd_float3 a) { return __tg_tan(a); } static inline simd_float4 VFX_SIMD vfx_tan(simd_float4 a) { return __tg_tan(a); } static inline float VFX_SIMD vfx_acos(float a) { return __tg_acos(a); } static inline simd_float2 VFX_SIMD vfx_acos(simd_float2 a) { return __tg_acos(a); } static inline simd_float3 VFX_SIMD vfx_acos(simd_float3 a) { return __tg_acos(a); } static inline simd_float4 VFX_SIMD vfx_acos(simd_float4 a) { return __tg_acos(a); } static inline float VFX_SIMD vfx_asin(float a) { return __tg_asin(a); } static inline simd_float2 VFX_SIMD vfx_asin(simd_float2 a) { return __tg_asin(a); } static inline simd_float3 VFX_SIMD vfx_asin(simd_float3 a) { return __tg_asin(a); } static inline simd_float4 VFX_SIMD vfx_asin(simd_float4 a) { return __tg_asin(a); } static inline float VFX_SIMD vfx_atan(float a) { return __tg_atan(a); } static inline simd_float2 VFX_SIMD vfx_atan(simd_float2 a) { return __tg_atan(a); } static inline simd_float3 VFX_SIMD vfx_atan(simd_float3 a) { return __tg_atan(a); } static inline simd_float4 VFX_SIMD vfx_atan(simd_float4 a) { return __tg_atan(a); } static inline float VFX_SIMD vfx_atan2(float a, float b) { return __tg_atan2(a, b); } static inline simd_float2 VFX_SIMD vfx_atan2(simd_float2 a, simd_float2 b) { return __tg_atan2(a, b); } static inline simd_float3 VFX_SIMD vfx_atan2(simd_float3 a, simd_float3 b) { return __tg_atan2(a, b); } static inline simd_float4 VFX_SIMD vfx_atan2(simd_float4 a, simd_float4 b) { return __tg_atan2(a, b); } static inline float VFX_SIMD vfx_acosh(float a) { return __tg_acosh(a); } static inline simd_float2 VFX_SIMD vfx_acosh(simd_float2 a) { return __tg_acosh(a); } static inline simd_float3 VFX_SIMD vfx_acosh(simd_float3 a) { return __tg_acosh(a); } static inline simd_float4 VFX_SIMD vfx_acosh(simd_float4 a) { return __tg_acosh(a); } static inline float VFX_SIMD vfx_asinh(float a) { return __tg_asinh(a); } static inline simd_float2 VFX_SIMD vfx_asinh(simd_float2 a) { return __tg_asinh(a); } static inline simd_float3 VFX_SIMD vfx_asinh(simd_float3 a) { return __tg_asinh(a); } static inline simd_float4 VFX_SIMD vfx_asinh(simd_float4 a) { return __tg_asinh(a); } static inline float VFX_SIMD vfx_atanh(float a) { return __tg_atanh(a); } static inline simd_float2 VFX_SIMD vfx_atanh(simd_float2 a) { return __tg_atanh(a); } static inline simd_float3 VFX_SIMD vfx_atanh(simd_float3 a) { return __tg_atanh(a); } static inline simd_float4 VFX_SIMD vfx_atanh(simd_float4 a) { return __tg_atanh(a); } static inline float VFX_SIMD vfx_sinpi(float a) { return __sinpif(a); } static inline float VFX_SIMD vfx_cospi(float a) { return __cospif(a); } static inline float VFX_SIMD vfx_tanpi(float a) { return __tanpif(a); } static inline float VFX_SIMD vfx_exp(float a) { return __tg_exp(a); } static inline simd_float2 VFX_SIMD vfx_exp(simd_float2 a) { return __tg_exp(a); } static inline simd_float3 VFX_SIMD vfx_exp(simd_float3 a) { return __tg_exp(a); } static inline simd_float4 VFX_SIMD vfx_exp(simd_float4 a) { return __tg_exp(a); } static inline float VFX_SIMD vfx_exp2(float a) { return __tg_exp2(a); } static inline simd_float2 VFX_SIMD vfx_exp2(simd_float2 a) { return __tg_exp2(a); } static inline simd_float3 VFX_SIMD vfx_exp2(simd_float3 a) { return __tg_exp2(a); } static inline simd_float4 VFX_SIMD vfx_exp2(simd_float4 a) { return __tg_exp2(a); } static inline float VFX_SIMD vfx_exp10(float a) { return __tg_exp10(a); } static inline simd_float2 VFX_SIMD vfx_exp10(simd_float2 a) { return __tg_exp10(a); } static inline simd_float3 VFX_SIMD vfx_exp10(simd_float3 a) { return __tg_exp10(a); } static inline simd_float4 VFX_SIMD vfx_exp10(simd_float4 a) { return __tg_exp10(a); } static inline float VFX_SIMD vfx_log(float a) { return __tg_log(a); } static inline simd_float2 VFX_SIMD vfx_log(simd_float2 a) { return __tg_log(a); } static inline simd_float3 VFX_SIMD vfx_log(simd_float3 a) { return __tg_log(a); } static inline simd_float4 VFX_SIMD vfx_log(simd_float4 a) { return __tg_log(a); } static inline float VFX_SIMD vfx_pow(float a, float b) { return __tg_pow(a, b); } static inline simd_float2 VFX_SIMD vfx_pow(simd_float2 a, simd_float2 b) { return __tg_pow(a, b); } static inline simd_float3 VFX_SIMD vfx_pow(simd_float3 a, simd_float3 b) { return __tg_pow(a, b); } static inline simd_float4 VFX_SIMD vfx_pow(simd_float4 a, simd_float4 b) { return __tg_pow(a, b); } //static inline bool VFX_SIMD vfx_is_normal(float a) { return __libcpp_isnormal(a); } static inline simd_int2 VFX_SIMD vfx_is_normal(simd_float2 a) { return __tg_isnormal(a); } static inline simd_int3 VFX_SIMD vfx_is_normal(simd_float3 a) { return __tg_isnormal(a); } static inline simd_int4 VFX_SIMD vfx_is_normal(simd_float4 a) { return __tg_isnormal(a); } static inline bool VFX_SIMD vfx_is_infinite(float a) { return isinf(a); } static inline simd_int2 VFX_SIMD vfx_is_infinite(simd_float2 a) { return __tg_isinf(a); } static inline simd_int3 VFX_SIMD vfx_is_infinite(simd_float3 a) { return __tg_isinf(a); } static inline simd_int4 VFX_SIMD vfx_is_infinite(simd_float4 a) { return __tg_isinf(a); } #define vfx_length(a) simd_length(a) #define vfx_length_squared(a) simd_length_squared(a) #define vfx_distance(a, b) simd_distance(a, b) #define vfx_distance_squared(a, b) simd_distance_squared(a, b) #define vfx_normalize(a) simd_normalize(a) #define vfx_cross(a, b) simd_cross(a, b) #define vfx_dot(a, b) simd_dot(a, b) static inline simd_float2x2 VFX_SIMD vfx_transpose(simd_float2x2 m) { return simd_transpose(m); } static inline simd_float3x3 VFX_SIMD vfx_transpose(simd_float3x3 m) { return simd_transpose(m); } static inline simd_float4x4 VFX_SIMD vfx_transpose(simd_float4x4 m) { return simd_transpose(m); } // Not yet available in metal :/ static inline simd_float2x2 VFX_SIMD vfx_inverse(simd_float2x2 m) { return simd_inverse(m); } static inline simd_float3x3 VFX_SIMD vfx_inverse(simd_float3x3 m) { return simd_inverse(m); } static inline simd_float4x4 VFX_SIMD vfx_inverse(simd_float4x4 m) { return simd_inverse(m); } static inline simd_float4 VFX_SIMD vfx_muladd(simd_float4 a, simd_float4 b, simd_float4 c) { return simd_muladd(a, b, c); } static inline simd_float3 VFX_SIMD vfx_muladd(simd_float3 a, simd_float3 b, simd_float3 c) { return simd_muladd(a, b, c); } static inline simd_float2 VFX_SIMD vfx_muladd(simd_float2 a, simd_float2 b, simd_float2 c) { return simd_muladd(a, b, c); } static inline float VFX_SIMD vfx_muladd(float a, float b, float c) { return simd_muladd(a, b, c); } static inline float VFX_SIMD vfx_reduce_min(simd_float2 a) { return simd_reduce_min(a); } static inline float VFX_SIMD vfx_reduce_min(simd_float3 a) { return simd_reduce_min(a); } static inline float VFX_SIMD vfx_reduce_min(simd_float4 a) { return simd_reduce_min(a); } static inline float VFX_SIMD vfx_reduce_max(simd_float2 a) { return simd_reduce_max(a); } static inline float VFX_SIMD vfx_reduce_max(simd_float3 a) { return simd_reduce_max(a); } static inline float VFX_SIMD vfx_reduce_max(simd_float4 a) { return simd_reduce_max(a); } static inline float VFX_SIMD vfx_reduce_add(simd_float2 a) { return simd_reduce_add(a); } static inline float VFX_SIMD vfx_reduce_add(simd_float3 a) { return simd_reduce_add(a); } static inline float VFX_SIMD vfx_reduce_add(simd_float4 a) { return simd_reduce_add(a); } #endif static inline int32_t VFX_SIMD vfx_min(int32_t a, int32_t b) { return a < b ? a : b; } static inline uint32_t VFX_SIMD vfx_min(uint32_t a, uint32_t b) { return a < b ? a : b; } static inline int32_t VFX_SIMD vfx_max(int32_t a, int32_t b) { return a > b ? a : b; } static inline uint32_t VFX_SIMD vfx_max(uint32_t a, uint32_t b) { return a > b ? a : b; } static inline simd_float3 VFX_SIMD vfx_matrix_get_x_axis(simd_float4x4 m) { return m.columns[0].xyz; } static inline simd_float3 VFX_SIMD vfx_matrix_get_y_axis(simd_float4x4 m) { return m.columns[1].xyz; } static inline simd_float3 VFX_SIMD vfx_matrix_get_z_axis(simd_float4x4 m) { return m.columns[2].xyz; } static inline simd_float3 VFX_SIMD vfx_matrix_get_position(simd_float4x4 m) { return m.columns[3].xyz; } static inline void VFX_SIMD_UNPURE vfx_matrix_set_row(vfx_out simd_float4x4* m, int32_t i, simd_float4 v) { m->columns[0][i] = v.x; m->columns[1][i] = v.y; m->columns[2][i] = v.z; m->columns[3][i] = v.w; } static inline simd_float4x4 VFX_SIMD vfx_matrix_scale(simd_float4x4 x, simd_float4 a) { x.columns[0] *= a; x.columns[1] *= a; x.columns[2] *= a; x.columns[3] *= a; return x; } static inline simd_float3x4 VFX_SIMD vfx_matrix_scale(simd_float3x4 x, simd_float4 a) { x.columns[0] *= a; x.columns[1] *= a; x.columns[2] *= a; return x; } static inline simd_float4x4 VFX_SIMD vfx_matrix_blend(simd_float4x4 x, float a, simd_float4x4 y) { x.columns[0] += a * y.columns[0]; x.columns[1] += a * y.columns[1]; x.columns[2] += a * y.columns[2]; x.columns[3] += a * y.columns[3]; return x; } static inline simd_float3x4 VFX_SIMD vfx_matrix_blend(simd_float3x4 x, float a, simd_float3x4 y) { x.columns[0] += a * y.columns[0]; x.columns[1] += a * y.columns[1]; x.columns[2] += a * y.columns[2]; return x; } static inline float VFX_SIMD vfx_pow2(float a) { return a * a; } static inline simd_float2 VFX_SIMD vfx_pow2(simd_float2 a) { return a * a; } static inline simd_float3 VFX_SIMD vfx_pow2(simd_float3 a) { return a * a; } static inline simd_float4 VFX_SIMD vfx_pow2(simd_float4 a) { return a * a; } static inline float VFX_SIMD vfx_pow3(float a) { return a * a * a; } static inline simd_float2 VFX_SIMD vfx_pow3(simd_float2 a) { return a * a * a; } static inline simd_float3 VFX_SIMD vfx_pow3(simd_float3 a) { return a * a * a; } static inline simd_float4 VFX_SIMD vfx_pow3(simd_float4 a) { return a * a * a; } static inline float VFX_SIMD vfx_pow4(float a) { return a * a * a * a; } static inline simd_float2 VFX_SIMD vfx_pow4(simd_float2 a) { return a * a * a * a; } static inline simd_float3 VFX_SIMD vfx_pow4(simd_float3 a) { return a * a * a * a; } static inline simd_float4 VFX_SIMD vfx_pow4(simd_float4 a) { return a * a * a * a; } static inline float VFX_SIMD vfx_pow5(float a) { return a * a * a * a * a; } static inline simd_float2 VFX_SIMD vfx_pow5(simd_float2 a) { return a * a * a * a * a; } static inline simd_float3 VFX_SIMD vfx_pow5(simd_float3 a) { return a * a * a * a * a; } static inline simd_float4 VFX_SIMD vfx_pow5(simd_float4 a) { return a * a * a * a * a; } static inline int VFX_SIMD vfx_max_index(simd_float2 a) { return a.x >= a.y ? 0 : 1; } static inline int VFX_SIMD vfx_max_index(simd_float3 a) { return a.x >= a.y ? (a.x >= a.z ? 0 : 2) : (a.y >= a.z ? 1 : 2); } // 20 inst (15 with AVX) static inline int VFX_SIMD vfx_max_index(simd_float4 v) { simd_int4 ind = { 0, 1, 2, 3 }; simd_float4 min_lh = vfx_max(v, v.zwxy); simd_int4 min_ind_lh = simd_bitselect(ind, ind.zwxy, v < v.zwxy); return (min_lh.x > min_lh.y) ? min_ind_lh.x : min_ind_lh.y; } static inline int VFX_SIMD vfx_min_index(simd_float2 a) { return a.x <= a.y ? 0 : 1; } static inline int VFX_SIMD vfx_min_index(simd_float3 a) { return a.x <= a.y ? (a.x <= a.z ? 0 : 2) : (a.y <= a.z ? 1 : 2); } // 20 inst (15 with AVX) static inline int VFX_SIMD vfx_min_index(simd_float4 v) { simd_int4 ind = { 0, 1, 2, 3 }; simd_float4 min_lh = vfx_min(v, v.zwxy); simd_int4 min_ind_lh = simd_bitselect(ind, ind.zwxy, v > v.zwxy); return (min_lh.x < min_lh.y) ? min_ind_lh.x : min_ind_lh.y; } // Compute the mix/max values and indices of four vectors (given as columns of a matrix) static inline simd_float4 VFX_SIMD_UNPURE vfx_min_and_indices(simd_float4x4 v, vfx_out simd_int4 *outIndices) { const simd_int4 i0 = ( 0 ); const simd_int4 i1 = ( 1 ); const simd_int4 i2 = ( 2 ); const simd_int4 i3 = ( 3 ); simd_float4x4 t = vfx_transpose(v); simd_float4 min_xy = vfx_min(t.columns[0], t.columns[1]); simd_int4 ind_xy = simd_bitselect(i0, i1, t.columns[0] > t.columns[1]); simd_float4 min_zw = vfx_min(t.columns[2], t.columns[3]); simd_int4 ind_zw = simd_bitselect(i2, i3, t.columns[2] > t.columns[3]); *outIndices = simd_bitselect(ind_xy, ind_zw, min_xy > min_zw); return vfx_min(min_xy, min_zw); } static inline simd_float4 VFX_SIMD_UNPURE vfx_max_and_indices(simd_float4x4 v, vfx_out simd_int4 *outIndices) { const simd_int4 i0 = ( 0 ); const simd_int4 i1 = ( 1 ); const simd_int4 i2 = ( 2 ); const simd_int4 i3 = ( 3 ); simd_float4x4 t = vfx_transpose(v); simd_float4 max_xy = vfx_max(t.columns[0], t.columns[1]); simd_int4 ind_xy = simd_bitselect(i0, i1, t.columns[0] < t.columns[1]); simd_float4 max_zw = vfx_max(t.columns[2], t.columns[3]); simd_int4 ind_zw = simd_bitselect(i2, i3, t.columns[2] < t.columns[3]); *outIndices = simd_bitselect(ind_xy, ind_zw, max_xy < max_zw); return vfx_max(max_xy, max_zw); } // normalize 4 float3 stored interleaved (xxxx, yyyy, zzzz) static inline simd_float3x4 VFX_SIMD vfx_vector_fast_normalize_interleaved(simd_float3x4 m) { simd_float4 x2 = m.columns[0] * m.columns[0]; simd_float4 y2 = m.columns[1] * m.columns[1]; simd_float4 z2 = m.columns[2] * m.columns[2]; simd_float4 len2 = x2 + y2 + z2; return vfx_matrix_scale(m, vfx_rsqrt(len2)); } static inline simd_float2 VFX_SIMD vfx_splat2(float x) { return vfx_make_float2(x, x); } static inline simd_float3 VFX_SIMD vfx_splat3(float x) { return vfx_make_float3(x, x, x); } static inline simd_float4 VFX_SIMD vfx_splat4(float x) { return vfx_make_float4(x, x, x, x); } #ifdef __METAL_VERSION__ static inline simd_float2x2 VFX_SIMD vfx_diagonal(simd_float2 d) { return float2x2(float2(d.x, 0), float2(0, d.y)); } static inline simd_float3x3 VFX_SIMD vfx_diagonal(simd_float3 d) { return float3x3(float3(d.x, 0, 0), float3(0, d.y, 0), float3(0, 0, d.z)); } static inline simd_float4x4 VFX_SIMD vfx_diagonal(simd_float4 d) { return float4x4(float4(d.x, 0, 0, 0), float4(0, d.y, 0, 0), float4(0, 0, d.z, 0), float4(0, 0, 0, d.w)); } static inline simd_float3x3 VFX_SIMD vfx_float3x3(simd_float4x4 m) { return float3x3(m[0].xyz, m[1].xyz, m[2].xyz); } static inline simd_float2x2 VFX_SIMD vfx_float2x2(simd_float2 c0, simd_float2 c1) { return float2x2(c0, c1); } static inline simd_float2x3 VFX_SIMD vfx_float2x3(simd_float3 c0, simd_float3 c1) { return float2x3(c0, c1); } static inline simd_float2x4 VFX_SIMD vfx_float2x4(simd_float4 c0, simd_float4 c1) { return float2x4(c0, c1); } static inline simd_float3x2 VFX_SIMD vfx_float3x2(simd_float2 c0, simd_float2 c1, simd_float2 c2) { return float3x2(c0, c1, c2); } static inline simd_float3x3 VFX_SIMD vfx_float3x3(simd_float3 c0, simd_float3 c1, simd_float3 c2) { return float3x3(c0, c1, c2); } static inline simd_float3x4 VFX_SIMD vfx_float3x4(simd_float4 c0, simd_float4 c1, simd_float4 c2) { return float3x4(c0, c1, c2); } static inline simd_float4x2 VFX_SIMD vfx_float4x2(simd_float2 c0, simd_float2 c1, simd_float2 c2, simd_float2 c3) { return float4x2(c0, c1, c2, c3); } static inline simd_float4x3 VFX_SIMD vfx_float4x3(simd_float3 c0, simd_float3 c1, simd_float3 c2, simd_float3 c3) { return float4x3(c0, c1, c2, c3); } static inline simd_float4x4 VFX_SIMD vfx_float4x4(simd_float4 c0, simd_float4 c1, simd_float4 c2, simd_float4 c3) { return float4x4(c0, c1, c2, c3); } static inline simd_float3x2 VFX_SIMD vfx_float3x2_from_rows(simd_float3 c0, simd_float3 c1) { return transpose(float2x3(c0, c1)); } static inline simd_float3x3 VFX_SIMD vfx_float3x3_from_rows(simd_float3 c0, simd_float3 c1, simd_float3 c2) { return transpose(float3x3(c0, c1, c2)); } static inline simd_float3x4 VFX_SIMD vfx_float3x4_from_rows(simd_float3 c0, simd_float3 c1, simd_float3 c2, simd_float3 c3) { return transpose(float4x3(c0, c1, c2, c3)); } static inline simd_float4x2 VFX_SIMD vfx_float4x2_from_rows(simd_float4 c0, simd_float4 c1) { return transpose(float2x4(c0, c1)); } static inline simd_float4x3 VFX_SIMD vfx_float4x3_from_rows(simd_float4 c0, simd_float4 c1, simd_float4 c2) { return transpose(float3x4(c0, c1, c2)); } static inline simd_float4x4 VFX_SIMD vfx_float4x4_from_rows(simd_float4 c0, simd_float4 c1, simd_float4 c2, simd_float4 c3) { return transpose(float4x4(c0, c1, c2, c3)); } #else static inline simd_float2x2 VFX_SIMD vfx_diagonal(simd_float2 d) { return simd_diagonal_matrix(d); } static inline simd_float3x3 VFX_SIMD vfx_diagonal(simd_float3 d) { return simd_diagonal_matrix(d); } static inline simd_float4x4 VFX_SIMD vfx_diagonal(simd_float4 d) { return simd_diagonal_matrix(d); } static inline simd_float3x3 VFX_SIMD vfx_float3x3(simd_float4x4 m) { return simd_matrix(m.columns[0].xyz, m.columns[1].xyz, m.columns[2].xyz); } static inline simd_float2x2 VFX_SIMD vfx_float2x2(simd_float2 c0, simd_float2 c1) { return simd_matrix(c0, c1); } static inline simd_float2x3 VFX_SIMD vfx_float2x3(simd_float3 c0, simd_float3 c1) { return simd_matrix(c0, c1); } static inline simd_float2x4 VFX_SIMD vfx_float2x4(simd_float4 c0, simd_float4 c1) { return simd_matrix(c0, c1); } static inline simd_float3x2 VFX_SIMD vfx_float3x2(simd_float2 c0, simd_float2 c1, simd_float2 c2) { return simd_matrix(c0, c1, c2); } static inline simd_float3x3 VFX_SIMD vfx_float3x3(simd_float3 c0, simd_float3 c1, simd_float3 c2) { return simd_matrix(c0, c1, c2); } static inline simd_float3x4 VFX_SIMD vfx_float3x4(simd_float4 c0, simd_float4 c1, simd_float4 c2) { return simd_matrix(c0, c1, c2); } static inline simd_float4x2 VFX_SIMD vfx_float4x2(simd_float2 c0, simd_float2 c1, simd_float2 c2, simd_float2 c3) { return simd_matrix(c0, c1, c2, c3); } static inline simd_float4x3 VFX_SIMD vfx_float4x3(simd_float3 c0, simd_float3 c1, simd_float3 c2, simd_float3 c3) { return simd_matrix(c0, c1, c2, c3); } static inline simd_float4x4 VFX_SIMD vfx_float4x4(simd_float4 c0, simd_float4 c1, simd_float4 c2, simd_float4 c3) { return simd_matrix(c0, c1, c2, c3); } static inline simd_float3x2 VFX_SIMD vfx_float3x2_from_rows(simd_float3 c0, simd_float3 c1) { return simd_matrix_from_rows(c0, c1); } static inline simd_float3x3 VFX_SIMD vfx_float3x3_from_rows(simd_float3 c0, simd_float3 c1, simd_float3 c2) { return simd_matrix_from_rows(c0, c1, c2); } static inline simd_float3x4 VFX_SIMD vfx_float3x4_from_rows(simd_float3 c0, simd_float3 c1, simd_float3 c2, simd_float3 c3) { return simd_matrix_from_rows(c0, c1, c2, c3); } static inline simd_float4x2 VFX_SIMD vfx_float4x2_from_rows(simd_float4 c0, simd_float4 c1) { return simd_matrix_from_rows(c0, c1); } static inline simd_float4x3 VFX_SIMD vfx_float4x3_from_rows(simd_float4 c0, simd_float4 c1, simd_float4 c2) { return simd_matrix_from_rows(c0, c1, c2); } static inline simd_float4x4 VFX_SIMD vfx_float4x4_from_rows(simd_float4 c0, simd_float4 c1, simd_float4 c2, simd_float4 c3) { return simd_matrix_from_rows(c0, c1, c2, c3); } #endif static inline simd_float2 VFX_SIMD vfx_float2_zero(void) { return (simd_float2){ 0, 0 }; } static inline simd_float2 VFX_SIMD vfx_float2_half_one(void) { return (simd_float2){ 0.5, 0.5 }; } static inline simd_float2 VFX_SIMD vfx_float2_one(void) { return (simd_float2){ 1, 1 }; } static inline simd_float2 VFX_SIMD vfx_float2_two(void) { return (simd_float2){ 2, 2 }; } static inline simd_float3 VFX_SIMD vfx_float3_zero(void) { return (simd_float3){ 0, 0, 0 }; } static inline simd_float3 VFX_SIMD vfx_float3_half_one(void) { return (simd_float3){ 0.5, 0.5, 0.5 }; } static inline simd_float3 VFX_SIMD vfx_float3_one(void) { return (simd_float3){ 1, 1, 1 }; } static inline simd_float3 VFX_SIMD vfx_float3_two(void) { return (simd_float3){ 2, 2, 2 }; } static inline simd_float3 VFX_SIMD vfx_float3_unit_x(void) { return (simd_float3){ 1, 0, 0 }; } static inline simd_float3 VFX_SIMD vfx_float3_unit_y(void) { return (simd_float3){ 0, 1, 0 }; } static inline simd_float3 VFX_SIMD vfx_float3_unit_z(void) { return (simd_float3){ 0, 0, 1 }; } static inline simd_float4 VFX_SIMD vfx_float4_zero(void) { return (simd_float4){ 0, 0, 0, 0 }; } static inline simd_float4 VFX_SIMD vfx_float4_half_one(void) { return (simd_float4){ 0.5, 0.5, 0.5, 0.5 }; } static inline simd_float4 VFX_SIMD vfx_float4_one(void) { return (simd_float4){ 1, 1, 1, 1 }; } static inline simd_float4 VFX_SIMD vfx_float4_two(void) { return (simd_float4){ 2, 2, 2, 2 }; } static inline simd_float4 VFX_SIMD vfx_float4_unit_x(void) { return (simd_float4){ 1, 0, 0, 0 }; } static inline simd_float4 VFX_SIMD vfx_float4_unit_y(void) { return (simd_float4){ 0, 1, 0, 0 }; } static inline simd_float4 VFX_SIMD vfx_float4_unit_z(void) { return (simd_float4){ 0, 0, 1, 0 }; } static inline simd_float4 VFX_SIMD vfx_float4_unit_w(void) { return (simd_float4){ 0, 0, 0, 1 }; } static inline simd_float3x3 VFX_SIMD vfx_float3x3_identity(void) { return vfx_float3x3(vfx_float3_unit_x(), vfx_float3_unit_y(), vfx_float3_unit_z()); } static inline simd_float4x4 VFX_SIMD vfx_float4x4_identity(void) { return vfx_float4x4(vfx_float4_unit_x(), vfx_float4_unit_y(), vfx_float4_unit_z(), vfx_float4_unit_w()); } static inline vfx_packed_half4 VFX_SIMD vfx_packed_half4_one(void) { return (vfx_packed_half4){ 0x3c00, 0x3c00, 0x3c00, 0x3c00 }; } static inline simd_float4x4 VFX_SIMD vfx_float4x4(simd_float3x3 m) { return vfx_float4x4(vfx_make_float4(m.columns[0].xyz, 0), vfx_make_float4(m.columns[1].xyz, 0), vfx_make_float4(m.columns[2].xyz, 0), vfx_float4_unit_w()); } #if !defined(__FAST_MATH__) static inline bool VFX_SIMD vfx_is_nan(float a) { return isnan(a); } static inline bool VFX_SIMD vfx_contains_nan(simd_float2 a) { return vfx_any(a != a); } static inline bool VFX_SIMD vfx_contains_nan(simd_float3 a) { return vfx_any(a != a); } static inline bool VFX_SIMD vfx_contains_nan(simd_float4 a) { return vfx_any(a != a); } static inline bool VFX_SIMD vfx_contains_nan(simd_float4x4 a) { return vfx_contains_nan(a.columns[0]) || vfx_contains_nan(a.columns[1]) || vfx_contains_nan(a.columns[2]) || vfx_contains_nan(a.columns[3]); } static inline simd_float3 VFX_SIMD vfx_clear_nan(simd_float3 a) { return vfx_select(a, 0.f, a != a); } static inline simd_float4 VFX_SIMD vfx_clear_nan(simd_float4 a) { return vfx_select(a, 0.f, a != a); } #else static inline bool VFX_SIMD vfx_is_nan(float a) { return false; } static inline bool VFX_SIMD vfx_contains_nan(simd_float2 a) { return false; } static inline bool VFX_SIMD vfx_contains_nan(simd_float3 a) { return false; } static inline bool VFX_SIMD vfx_contains_nan(simd_float4 a) { return false; } static inline bool VFX_SIMD vfx_contains_nan(simd_float4x4 a) { return false; } static inline simd_float3 VFX_SIMD vfx_clear_nan(simd_float3 a) { return a; } static inline simd_float4 VFX_SIMD vfx_clear_nan(simd_float4 a) { return a; } #endif // this is faster but breaks when feed with NaN // return !simd_any((coord < 0.f) | (coord > 1.f)); static inline bool VFX_SIMD vfx_is_in_01(simd_float2 p) { return vfx_all(vfx_abs(p - 0.5f) <= 0.5f); } static inline bool VFX_SIMD vfx_is_in_01(simd_float3 p) { return vfx_all(vfx_abs(p - 0.5f) <= 0.5f); } static inline bool VFX_SIMD vfx_is_in_01(simd_float4 p) { return vfx_all(vfx_abs(p - 0.5f) <= 0.5f); } static inline bool VFX_SIMD vfx_is_outside_01(simd_float2 p) { return vfx_any((p < 0.f) | (p > 1.f)); } static inline bool VFX_SIMD vfx_is_outside_01(simd_float3 p) { return vfx_any((p < 0.f) | (p > 1.f)); } static inline bool VFX_SIMD vfx_is_outside_01(simd_float4 p) { return vfx_any((p < 0.f) | (p > 1.f)); } // a * b + c static inline simd_float4 VFX_SIMD vfx_muladd(simd_float4 a, float b, simd_float4 c) { return vfx_muladd(a, (simd_float4)(b), c); } static inline simd_float3 VFX_SIMD vfx_muladd(simd_float3 a, float b, simd_float3 c) { return vfx_muladd(a, (simd_float3)(b), c); } static inline simd_float2 VFX_SIMD vfx_muladd(simd_float2 a, float b, simd_float2 c) { return vfx_muladd(a, (simd_float2)(b), c); } static inline float VFX_SIMD vfx_get_x(simd_float2 a) { return a.x; } static inline simd_float2 VFX_SIMD vfx_set_x(simd_float2 a, float x) { a.x = x; return a; } static inline float VFX_SIMD vfx_get_y(simd_float2 a) { return a.y; } static inline simd_float2 VFX_SIMD vfx_set_y(simd_float2 a, float y) { a.y = y; return a; } static inline float VFX_SIMD vfx_get_x(simd_float3 a) { return a.x; } static inline simd_float3 VFX_SIMD vfx_set_x(simd_float3 a, float x) { a.x = x; return a; } static inline float VFX_SIMD vfx_get_y(simd_float3 a) { return a.y; } static inline simd_float3 VFX_SIMD vfx_set_y(simd_float3 a, float y) { a.y = y; return a; } static inline float VFX_SIMD vfx_get_z(simd_float3 a) { return a.z; } static inline simd_float3 VFX_SIMD vfx_set_z(simd_float3 a, float z) { a.z = z; return a; } static inline float VFX_SIMD vfx_get_x(simd_float4 a) { return a.x; } static inline simd_float4 VFX_SIMD vfx_set_x(simd_float4 a, float x) { a.x = x; return a; } static inline float VFX_SIMD vfx_get_y(simd_float4 a) { return a.y; } static inline simd_float4 VFX_SIMD vfx_set_y(simd_float4 a, float y) { a.y = y; return a; } static inline float VFX_SIMD vfx_get_z(simd_float4 a) { return a.z; } static inline simd_float4 VFX_SIMD vfx_set_z(simd_float4 a, float z) { a.z = z; return a; } static inline float VFX_SIMD vfx_get_w(simd_float4 a) { return a.w; } static inline simd_float2 VFX_SIMD vfx_get_xy(simd_float3 a) { return a.xy; } static inline simd_float2 VFX_SIMD vfx_get_xy(simd_float4 a) { return a.xy; } static inline simd_float3 VFX_SIMD vfx_get_xyz(simd_float4 a) { return a.xyz; } static inline simd_float4 VFX_SIMD vfx_set_xyz(simd_float4 a, simd_float3 xyz) { a.xyz = xyz; return a; } static inline simd_float2 VFX_SIMD vfx_remap_n11_to_01(simd_float2 v) { return v * 0.5f + 0.5f; } static inline simd_float3 VFX_SIMD vfx_remap_n11_to_01(simd_float3 v) { return v * 0.5f + 0.5f; } static inline simd_float3 VFX_SIMD vfx_remap_01_to_n11(simd_float3 v) { return v * 2.f - 1.f; } static inline bool VFX_SIMD vfx_is_almost_zero(float a) { return vfx_abs(a) <= VFX_EPSILON; } static inline bool VFX_SIMD vfx_is_almost_zero(simd_float2 a) { return vfx_all(vfx_abs(a) <= vfx_splat2(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_is_almost_zero(simd_float3 a) { return vfx_all(vfx_abs(a) <= vfx_splat3(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_is_almost_zero(simd_float4 a) { return vfx_all(vfx_abs(a) <= vfx_splat4(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_equal(float a, float b) { return a == b; } static inline bool VFX_SIMD vfx_equal(simd_float2 a, simd_float2 b) { return vfx_all(a == b); } static inline bool VFX_SIMD vfx_equal(simd_float3 a, simd_float3 b) { return vfx_all(a == b); } static inline bool VFX_SIMD vfx_equal(simd_float4 a, simd_float4 b) { return vfx_all(a == b); } static inline bool VFX_SIMD vfx_notEqual(float a, float b) { return a != b; } static inline bool VFX_SIMD vfx_notEqual(simd_float2 a, simd_float2 b) { return vfx_any(a != b); } static inline bool VFX_SIMD vfx_notEqual(simd_float3 a, simd_float3 b) { return vfx_any(a != b); } static inline bool VFX_SIMD vfx_notEqual(simd_float4 a, simd_float4 b) { return vfx_any(a != b); } static inline bool VFX_SIMD vfx_is_almost_equal(float a, float b) { return vfx_abs(a - b) <= VFX_EPSILON; } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float2 a, simd_float2 b) { return vfx_all(vfx_abs(a - b) <= vfx_splat2(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float3 a, simd_float3 b) { return vfx_all(vfx_abs(a - b) <= vfx_splat3(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float4 a, simd_float4 b) { return vfx_all(vfx_abs(a - b) <= vfx_splat4(VFX_EPSILON)); } static inline bool VFX_SIMD vfx_is_almost_equal(float a, float b, float eps) { return vfx_abs(a - b) <= eps; } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float2 a, simd_float2 b, float eps) { return vfx_all(vfx_abs(a - b) <= vfx_splat2(eps)); } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float3 a, simd_float3 b, float eps) { return vfx_all(vfx_abs(a - b) <= vfx_splat3(eps)); } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float4 a, simd_float4 b, float eps) { return vfx_all(vfx_abs(a - b) <= vfx_splat4(eps)); } static inline bool VFX_SIMD vfx_is_almost_equal(simd_float4x4 m1, simd_float4x4 m2, float eps) { simd_float4 veps = ( eps ); vfx_bool4 b = (vfx_abs(m1.columns[0] - m2.columns[0]) > veps); b |= (vfx_abs(m1.columns[1] - m2.columns[1]) > veps); b |= (vfx_abs(m1.columns[2] - m2.columns[2]) > veps); b |= (vfx_abs(m1.columns[3] - m2.columns[3]) > veps); return !vfx_any(b); } //static inline float VFX_SIMD vfx_max(int32_t a, int32_t b) { return a > b ? a : b; } //static inline float VFX_SIMD vfx_min(int32_t a, int32_t b) { return a < b ? a : b; } // retuns only -1 or 1. 0 is considered as 1 static inline float VFX_SIMD vfx_sign_fast(float a) { return a > 0 ? 1.f : -1.f; } static inline simd_float2 VFX_SIMD vfx_sign_fast(simd_float2 a) { return vfx_select(-vfx_float2_one(), vfx_float2_one(), a >= 0.f); } static inline simd_float3 VFX_SIMD vfx_sign_fast(simd_float3 a) { return vfx_select(-vfx_float3_one(), vfx_float3_one(), a >= 0.f); } static inline simd_float4 VFX_SIMD vfx_sign_fast(simd_float4 a) { return vfx_select(-vfx_float4_one(), vfx_float4_one(), a >= 0.f); } static inline simd_float2 VFX_SIMD vfx_sincos(float a) { #ifdef __METAL_VERSION__ float cs; float sn = sincos(a, cs); return (simd_float2){ sn, cs }; #else struct __float2 sc = __sincosf_stret(a); return (simd_float2){ sc.__sinval, sc.__cosval }; #endif } static inline VFX_SIMD float vfx_sinc(float x) { if (x == 0.f) return 1.f; return vfx_sin(x) / x; } static inline float VFX_SIMD vfx_mix(float a, float b, float t) { return vfx_muladd(b - a, t, a); } static inline simd_float2 VFX_SIMD vfx_mix(simd_float2 a, simd_float2 b, float t) { return vfx_muladd(b - a, t, a); } static inline simd_float3 VFX_SIMD vfx_mix(simd_float3 a, simd_float3 b, float t) { return vfx_muladd(b - a, t, a); } static inline simd_float4 VFX_SIMD vfx_mix(simd_float4 a, simd_float4 b, float t) { return vfx_muladd(b - a, t, a); } static inline simd_float2 VFX_SIMD vfx_mix(simd_float2 a, simd_float2 b, simd_float2 t) { return vfx_muladd(b - a, t, a); } static inline simd_float3 VFX_SIMD vfx_mix(simd_float3 a, simd_float3 b, simd_float3 t) { return vfx_muladd(b - a, t, a); } static inline simd_float4 VFX_SIMD vfx_mix(simd_float4 a, simd_float4 b, simd_float4 t) { return vfx_muladd(b - a, t, a); } // TODO coherent order with mix/smoothstep => start, end, x static inline int32_t VFX_SIMD vfx_clamp(int32_t v, int32_t mn, int32_t mx) { return vfx_min(vfx_max(v, mn), mx); } static inline float VFX_SIMD vfx_clamp(float v, float mn, float mx) { return vfx_min(vfx_max(v, mn), mx); } static inline simd_float2 VFX_SIMD vfx_clamp(simd_float2 v, simd_float2 mn, simd_float2 mx) { return vfx_min(vfx_max(v, mn), mx); } static inline simd_float3 VFX_SIMD vfx_clamp(simd_float3 v, simd_float3 mn, simd_float3 mx) { return vfx_min(vfx_max(v, mn), mx); } static inline simd_float4 VFX_SIMD vfx_clamp(simd_float4 v, simd_float4 mn, simd_float4 mx) { return vfx_min(vfx_max(v, mn), mx); } static inline float VFX_SIMD vfx_saturate(float v) { return vfx_clamp(v, 0.f, 1.f); } static inline simd_float2 VFX_SIMD vfx_saturate(simd_float2 v) { return vfx_clamp(v, 0.f, 1.f); } static inline simd_float3 VFX_SIMD vfx_saturate(simd_float3 v) { return vfx_clamp(v, 0.f, 1.f); } static inline simd_float4 VFX_SIMD vfx_saturate(simd_float4 v) { return vfx_clamp(v, 0.f, 1.f); } // TODO coherent order with mix/smoothstep => start, end, x static inline float VFX_SIMD vfx_remap_01(float v, float start, float end) { return vfx_saturate( (v - start) / (end - start)); } static inline simd_float2 VFX_SIMD vfx_remap_01(simd_float2 v, simd_float2 start, simd_float2 end) { return vfx_saturate( (v - start) / (end - start)); } static inline simd_float3 VFX_SIMD vfx_remap_01(simd_float3 v, simd_float3 start, simd_float3 end) { return vfx_saturate( (v - start) / (end - start)); } static inline simd_float4 VFX_SIMD vfx_remap_01(simd_float4 v, simd_float4 start, simd_float4 end) { return vfx_saturate( (v - start) / (end - start)); } static inline float VFX_SIMD vfx_smoothstep(float a, float b, float x) { float xn = vfx_remap_01(x, a, b); return xn * xn * (3 - 2 * xn); } static inline simd_float2 VFX_SIMD vfx_smoothstep(simd_float2 a, simd_float2 b, simd_float2 x) { simd_float2 xn = vfx_remap_01(x, a, b); return xn * xn * (3 - 2 * xn); } static inline simd_float3 VFX_SIMD vfx_smoothstep(simd_float3 a, simd_float3 b, simd_float3 x) { simd_float3 xn = vfx_remap_01(x, a, b); return xn * xn * (3 - 2 * xn); } static inline simd_float4 VFX_SIMD vfx_smoothstep(simd_float4 a, simd_float4 b, simd_float4 x) { simd_float4 xn = vfx_remap_01(x, a, b); return xn * xn * (3 - 2 * xn); } static inline bool VFX_SIMD vfx_approxequal(float a, float b, float eps) { return vfx_abs(a - b) < eps; } static inline bool VFX_SIMD vfx_approxequal(simd_float2 a, simd_float2 b, float eps) { return vfx_all(vfx_abs(a - b) < eps); } static inline bool VFX_SIMD vfx_approxequal(simd_float3 a, simd_float3 b, float eps) { return vfx_all(vfx_abs(a - b) < eps); } static inline bool VFX_SIMD vfx_approxequal(simd_float4 a, simd_float4 b, float eps) { return vfx_all(vfx_abs(a - b) < eps); } static inline float VFX_SIMD vfx_reduce_mul(simd_float2 a) { return a.x * a.y; } static inline float VFX_SIMD vfx_reduce_mul(simd_float3 a) { return a.x * a.y * a.z; } static inline float VFX_SIMD vfx_reduce_mul(simd_float4 a) { return a.x * a.y * a.z * a.w; } static inline int VFX_SIMD vfx_min_index(simd_float4 v, simd_int4 ind) { simd_float4 min_lh = vfx_min(v, v.zwxy); simd_int4 min_ind_lh = simd_bitselect(ind, ind.zwxy, v > v.zwxy); return (min_lh.x < min_lh.y) ? min_ind_lh.x : min_ind_lh.y; } static inline simd_float2 VFX_SIMD vfx_robust_normalize(simd_float2 v) { simd_float2 length2 = vfx_length_squared(v); return vfx_select(v, v * vfx_rsqrt(length2), length2 != vfx_float4_zero().xy); } static inline simd_float4 VFX_SIMD vfx_robust_normalize(simd_float4 v) { simd_float4 length2 = vfx_length_squared(v); return vfx_select(v, v * vfx_rsqrt(length2), length2 != vfx_float4_zero()); } static inline simd_float3 VFX_SIMD vfx_robust_normalize(simd_float3 v) { simd_float3 length2 = vfx_length_squared(v); return vfx_select(v, v * vfx_rsqrt(length2), length2 != vfx_float4_zero().xyz); } static inline simd_float3 VFX_SIMD vfx_rescale(simd_float3 v, float len) { simd_float3 length2 = vfx_length_squared(v); return vfx_select(v, v * (len * vfx_rsqrt(length2)), length2 != vfx_float4_zero().xyz); } static inline void VFX_SIMD_UNPURE vfx_orthonormal_basis(simd_float3 n, vfx_out simd_float3 *t1, vfx_out simd_float3 *t2) { // from http://marc-b-reynolds.github.io/quaternions/2016/07/06/Orthonormal.html float sz = vfx_copysign(1.0f, n.z); float a = n.y / (vfx_abs(n.z) + 1.f); float b = n.y * a; float c = -n.x * a; *t1 = (simd_float3){ n.z + sz * b, sz * c, -n.x }; *t2 = (simd_float3){ c, 1.f - b, -sz * n.y }; } static inline float VFX_SIMD vfx_angle(simd_float3 a, simd_float3 b) { float l2 = vfx_length(a) * vfx_length(b); return (l2 > 0.0) ? vfx_acos(vfx_dot(a, b) / l2) : 0.0; } // http://mathworld.wolfram.com/ScalarTripleProduct.html static inline float VFX_SIMD vfx_triple_product(simd_float3 a, simd_float3 b, simd_float3 c) { return vfx_dot(a, vfx_cross(b,c)); } // Returns true if vectors lie on the same plane. static inline bool VFX_SIMD vfx_is_coplanar(simd_float3 v1, simd_float3 v2, simd_float3 v3, float eps) { float tp = vfx_triple_product(v1, v2, v3); return vfx_abs(tp) < eps; } // Returns the vector projected on the plane defined by normal at orgine. // Plane normal is expected to be normalized. static inline simd_float3 VFX_SIMD vfx_tangent(simd_float3 v, simd_float3 normal) { return v - vfx_dot(v, normal) * normal; } // return an arbitrary tangent vector to a given direction. // Warning : this is not necessarily a unit vector static inline simd_float3 VFX_SIMD vfx_arbitrary_tangent(simd_float3 a) { simd_float3 cross_unit_z = { a.y, -a.x, 0.f }; simd_float3 cross_unit_x = { 0.f, a.z,-a.y }; bool z_is_zero = vfx_all(cross_unit_z == (simd_float3)( 0.f )); return vfx_select(cross_unit_z, cross_unit_x, z_is_zero); } // Oriented angle between two vectors around an arbitrary axis, all parameters // are expected to be normalized. The result is undefined if v1, v2, axis are // coplanar vectors. static inline float VFX_SIMD vfx_angle(simd_float3 v1, simd_float3 v2, simd_float3 axis) { simd_float3 planeProjV1 = vfx_robust_normalize(vfx_tangent(v1, axis)); simd_float3 planeProjV2 = vfx_robust_normalize(vfx_tangent(v2, axis)); simd_float3 c = vfx_cross(planeProjV1, planeProjV2); float diffAngle = vfx_acos(vfx_clamp(vfx_dot(planeProjV1, planeProjV2), -1.f, 1.f)); return vfx_dot(c, axis) > 0 ? diffAngle : -diffAngle; } //easing functions static inline float VFX_SIMD vfx_quadraticEaseIn(float a) { return vfx_pow2(a); } static inline float VFX_SIMD vfx_quadraticEaseOut(float a) { return -a * (a - 2.f); } static inline float VFX_SIMD vfx_quadraticEaseInOut(float a) { return a < 0.5f ? (2.f * (a * a)) : (-1.f + (4.f - 2.f * a) * a); } static inline float VFX_SIMD vfx_cubicEaseIn(float a) { return vfx_pow3(a); } static inline float VFX_SIMD vfx_cubicEaseOut(float a) { return 1 - vfx_pow3(1 - a); } static inline float VFX_SIMD vfx_cubicEaseInOut(float a) { return a < 0.5f ? (4.f * a * a * a) : ((a - 1.f) * (2.f * a - 2.f) * (2.f * a - 2.f) + 1.f); } static inline float VFX_SIMD vfx_quarticEaseIn(float a) { return vfx_pow4(a); } static inline float VFX_SIMD vfx_quarticEaseOut(float a) { return 1 - vfx_pow4(1-a); } static inline float VFX_SIMD vfx_quarticEaseInOut(float a) { return a < 0.5f ? (8.f * a * a * a * a) : (1.f - vfx_pow4(-2.f * a + 2.f) * 0.5f); } static inline float VFX_SIMD vfx_quinticEaseIn(float a) { return vfx_pow5(a); } static inline float VFX_SIMD vfx_quinticEaseOut(float a) { return 1 - vfx_pow5(1 - a); } static inline float VFX_SIMD vfx_quinticEaseInOut(float a) { return a < 0.5f ? (16.f * a * a * a * a * a) : (1.f - vfx_pow5(-2.f * a + 2.f) * 0.5f); } // Smooth timing curve with anticipation (ta) and midpoint (tmid) times static inline float VFX_SIMD vfx_physicalSmooth(float t, float ta, float tmid) { float tam = ta - tmid - tmid; // ta - 2tmid float xa = (2*t*(ta - t) / (ta*tmid + tam)); float xd = ((t - 2)*t*tam + (ta - 2)*tmid*tmid); return t=1 ? vfx_physicalOvershoot(t,ta,tmid,B) : vfx_physicalSmooth(t,ta,tmid); } #pragma mark - Batching // batch dots static inline simd_float2 VFX_SIMD vfx_dot_x2(simd_float3 v0, simd_float3 w0, simd_float3 v1, simd_float3 w1) { simd_float3x2 dot_mat = vfx_float3x2_from_rows(v0 * w0, v1 * w1); return dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2]; } static inline simd_float2 VFX_SIMD vfx_dot_x2(simd_float4 v0, simd_float4 w0, simd_float4 v1, simd_float4 w1) { simd_float4x2 dot_mat = vfx_float4x2_from_rows(v0 * w0, v1 * w1); return dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2] + dot_mat.columns[3]; } static inline simd_float2 VFX_SIMD vfx_dot3_x2(simd_float4 v0, simd_float4 w0, simd_float4 v1, simd_float4 w1) { simd_float4x2 dot_mat = vfx_float4x2_from_rows(v0 * w0, v1 * w1); return dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2]; } static inline simd_float3 VFX_SIMD vfx_dot_x3(simd_float3 v0, simd_float3 w0, simd_float3 v1, simd_float3 w1, simd_float3 v2, simd_float3 w2) { simd_float3x3 mat = vfx_float3x3_from_rows(v0 * w0, v1 * w1, v2 * w2); return mat.columns[0] + mat.columns[1] + mat.columns[2]; } static inline simd_float3 VFX_SIMD vfx_dot_x3(simd_float4 v0, simd_float4 w0, simd_float4 v1, simd_float4 w1, simd_float4 v2, simd_float4 w2) { simd_float4x3 mat = vfx_float4x3_from_rows(v0 * w0, v1 * w1, v2 * w2); return mat.columns[0] + mat.columns[1] + mat.columns[2] + mat.columns[3]; } static inline simd_float4 VFX_SIMD vfx_dot_x4(simd_float3 v0, simd_float3 w0, simd_float3 v1, simd_float3 w1, simd_float3 v2, simd_float3 w2, simd_float3 v3, simd_float3 w3) { simd_float3x4 dot_mat = vfx_float3x4_from_rows(v0 * w0, v1 * w1, v2 * w2, v3 * w3); return dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2]; } static inline simd_float4 VFX_SIMD vfx_dot_x4(simd_float4 v0, simd_float4 w0, simd_float4 v1, simd_float4 w1, simd_float4 v2, simd_float4 w2, simd_float4 v3, simd_float4 w3) { simd_float4x4 dot_mat = vfx_float4x4_from_rows(v0 * w0, v1 * w1, v2 * w2, v3 * w3); return dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2] + dot_mat.columns[3]; } static inline simd_float3 VFX_SIMD vfx_linear_combination(simd_float3 v0, float w0, simd_float3 v1, float w1) { return v0 * w0 + v1 * w1; } // post multiplication by a translation transform static inline simd_float4x4 VFX_SIMD vfx_post_translate(simd_float4x4 m, simd_float3 t) { simd_float3x3 tm = vfx_transpose(vfx_float3x3(m)); // m[3].x += vfx_dot(tm[0], t); m[3].y += vfx_dot(tm[1], t); m[3].z += vfx_dot(tm[2], t); m.columns[3].xyz += vfx_dot_x3(tm.columns[0], t, tm.columns[1], t, tm.columns[2], t); return m; } #pragma mark - Quaternion static inline simd_float4 VFX_SIMD vfx_quat_normalize(simd_float4 q) { return vfx_robust_normalize(q); } static inline float VFX_SIMD vfx_quat_length_(simd_float4 q) { return vfx_length(q); } static inline simd_float4 VFX_SIMD vfx_quat_negate(simd_float4 q) { return -q; } static inline simd_float4 VFX_SIMD vfx_quat_conjugate(simd_float4 q) { return q * (simd_float4){-1,-1,-1, 1}; } static inline simd_float4 VFX_SIMD vfx_quat_inverse(simd_float4 q) { return vfx_quat_conjugate(q) / vfx_length_squared(q); } static inline simd_float4 VFX_SIMD vfx_quat_identity_(void) { return vfx_float4_unit_w(); } static inline bool VFX_SIMD vfx_quat_is_identity(simd_float4 q) { return vfx_approxequal(q, vfx_quat_identity_(), 0.000001f); } static inline simd_float4 VFX_SIMD vfx_quat_mul(simd_float4 q1, simd_float4 q2) { return vfx_make_float4(q2.xyz * q1.w + q1.xyz * q2.w + vfx_cross(q1.xyz, q2.xyz), q1.w * q2.w - vfx_dot(q1.xyz, q2.xyz)); } #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_zero(void) { return (simd_quatf){ vfx_float4_zero() }; } static inline simd_quatf VFX_SIMD _vfx_simd_quatf(simd_float4 v) { return (simd_quatf){ .vector = v }; } static inline simd_quatf VFX_SIMD _vfx_simd_quatf(float x, float y, float z, float w) { return (simd_quatf){ .vector = vfx_make_float4(x, y, z, w) }; } static inline simd_quatf VFX_SIMD vfx_quat_normalize(simd_quatf q) { return _vfx_simd_quatf(vfx_robust_normalize(q.vector)); } static inline float VFX_SIMD vfx_quat_length(simd_quatf q) { return vfx_quat_length_(q.vector); } static inline simd_quatf VFX_SIMD vfx_quat_negate(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_negate(q.vector)); } static inline simd_quatf VFX_SIMD vfx_quat_conjugate(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_conjugate(q.vector)); } static inline simd_quatf VFX_SIMD vfx_quat_inverse(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_inverse(q.vector)); } static inline simd_quatf VFX_SIMD vfx_quat_identity(void) { return _vfx_simd_quatf(vfx_quat_identity_()); } static inline bool VFX_SIMD vfx_quat_is_identity(simd_quatf q) { return vfx_quat_is_identity(q.vector); } static inline simd_quatf VFX_SIMD vfx_quat_mul(simd_quatf q1, simd_quatf q2) { return _vfx_simd_quatf(vfx_quat_mul(q1.vector, q2.vector)); } #endif static inline VFX_SIMD simd_float4 vfx_quat_(simd_float3x3 mat) { const simd_float3 vfx_out *m = mat.columns; float num8 = m[0][0] + m[1][1] + m[2][2]; if (num8 > 0.0) { float num = vfx_sqrt(num8 + 1.f); return vfx_make_float4(m[1][2] - m[2][1], m[2][0] - m[0][2], m[0][1] - m[1][0], num * num) * (0.5f / num); } else if ((m[0][0] >= m[1][1]) && (m[0][0] >= m[2][2])) { float num7 = vfx_sqrt(((1.f + m[0][0]) - m[1][1]) - m[2][2]); return vfx_make_float4(num7 * num7, m[0][1] + m[1][0], m[0][2] + m[2][0], m[1][2] - m[2][1]) * (0.5f / num7); } else if (m[1][1] > m[2][2]) { float num6 = vfx_sqrt(((1.f + m[1][1]) - m[0][0]) - m[2][2]); return vfx_make_float4(m[1][0] + m[0][1], num6 * num6, m[2][1] + m[1][2], m[2][0] - m[0][2]) * (0.5f / num6); } float num5 = vfx_sqrt(((1.f + m[2][2]) - m[0][0]) - m[1][1]); return vfx_make_float4(m[2][0] + m[0][2], m[2][1] + m[1][2], num5 * num5, m[0][1] - m[1][0]) * (0.5f / num5); } static inline simd_float4 VFX_SIMD vfx_quat_(simd_float4x4 mat) { return vfx_quat_(vfx_float3x3(mat)); } static inline simd_float4 VFX_SIMD vfx_quat_rotation_between_unitvector_(simd_float3 a, simd_float3 b) { return vfx_quat_normalize(vfx_make_float4(vfx_cross(a, b), 1.f + vfx_dot(a, b))); } #ifdef __METAL_VERSION__ // custom versions // rotation is p' = q * p * conjugate(q); // simplification : https://blog.molecular-matters.com/2013/05/24/a-faster-quaternion-vector-multiplication/ static inline VFX_SIMD simd_float3 vfx_quat_rotate_vector(simd_float4 q, simd_float3 v) { return vfx_quat_mul(q, vfx_quat_mul(vfx_make_float4(v, 0), vfx_quat_conjugate(q))).xyz; } // TODO should be a matrix ctor ! static inline float3x3 vfx_quat_to_matrix3x3(simd_float4 q) { float3 q2 = q.xyz * q.xyz * 2.f; float3 wq = q.w * q.xyz; float xy = q.x * q.y; float yz = q.y * q.z; float xz = q.x * q.z; float3x3 m; m[0][0] = 1.0f - q2.y - q2.z; m[0][1] = 2.0f * (xy + wq.z); m[0][2] = 2.0f * (xz - wq.y); m[1][0] = 2.0f * (xy - wq.z); m[1][1] = 1.0f - q2.x - q2.z; m[1][2] = 2.0f * (yz + wq.x); m[2][0] = 2.0f * (xz + wq.y); m[2][1] = 2.0f * (yz - wq.x); m[2][2] = 1.0f - q2.x - q2.y; return m; } static inline float4x4 vfx_quat_to_matrix4x4(simd_float4 q) { return vfx_float4x4(vfx_quat_to_matrix3x3(q)); } static inline simd_float4 VFX_SIMD vfx_quat_rotation_between(simd_float3 a, simd_float3 b) { float4 q; q.xyz = vfx_cross(a, b); q.w = vfx_sqrt(vfx_length_squared(a) * vfx_length_squared(b)) + vfx_dot(a, b); return vfx_quat_normalize(q); } static inline simd_float4 VFX_SIMD vfx_quat_rotation_between(simd_float4 a, simd_float4 b) { return vfx_quat_mul(b, vfx_quat_inverse(a)); } #else // native SIMD version static inline simd_float3 VFX_SIMD vfx_quat_rotate_vector(simd_float4 q, simd_float3 v) { return simd_act(_vfx_simd_quatf(q), v); } static inline simd_float3x3 VFX_SIMD vfx_quat_to_matrix3x3(simd_float4 q) { return simd_matrix3x3(_vfx_simd_quatf(q)); } static inline simd_float4x4 VFX_SIMD vfx_quat_to_matrix4x4(simd_float4 q) { return simd_matrix4x4(_vfx_simd_quatf(q)); } static inline simd_float4 VFX_SIMD vfx_quat_rotation_between_(simd_float3 a, simd_float3 b) { return simd_quaternion(a, b).vector; } static inline simd_float4 VFX_SIMD vfx_quat_rotation_between_(simd_float4 a, simd_float4 b) { return vfx_quat_mul(b, vfx_quat_inverse(a)); } // simd_quatf versions static inline VFX_SIMD simd_quatf vfx_quat(simd_float3x3 matrix) { return _vfx_simd_quatf(vfx_quat_(matrix)); } static inline VFX_SIMD simd_quatf vfx_quat(simd_float4x4 matrix) { return _vfx_simd_quatf(vfx_quat_(matrix)); } static inline simd_float3 VFX_SIMD vfx_quat_rotate_vector(simd_quatf q, simd_float3 v) { return vfx_quat_rotate_vector(q.vector, v); } static inline simd_float3x3 VFX_SIMD vfx_quat_to_matrix3x3(simd_quatf q) { return vfx_quat_to_matrix3x3(q.vector); } static inline simd_float4x4 VFX_SIMD vfx_quat_to_matrix4x4(simd_quatf q) { return vfx_quat_to_matrix4x4(q.vector); } static inline simd_quatf VFX_SIMD vfx_quat_rotation_between(simd_float3 a, simd_float3 b) { return _vfx_simd_quatf(vfx_quat_rotation_between_(a, b)); } static inline simd_quatf VFX_SIMD vfx_quat_rotation_between(simd_quatf a, simd_quatf b) { return _vfx_simd_quatf(vfx_quat_rotation_between_(a.vector, b.vector)); } #endif // Matrix // Robust extraction of a rotation (3x3) matrix from a (scaled) 4x4. // Supports for one zero axis. With more, the matrix won't be orthogonal // but there will be no NaN static inline simd_float3x3 VFX_SIMD vfx_extract_orthogonal(simd_float3x3 m) { simd_float3x3 dot_mat = vfx_float3x3_from_rows(m.columns[0] * m.columns[0], m.columns[1] * m.columns[1], m.columns[2] * m.columns[2]); simd_float3 len_sqr = dot_mat.columns[0] + dot_mat.columns[1] + dot_mat.columns[2]; #ifdef __METAL_VERSION__ simd_bool3 len_is_zero = len_sqr <= VFX_MIN; #else simd_int3 len_is_zero = len_sqr <= VFX_MIN; #endif simd_float3 inv_len = vfx_select(vfx_precise_rsqrt(len_sqr), len_sqr, len_is_zero) ; simd_float3 vx = m.columns[0] * inv_len.x; simd_float3 vy = m.columns[1] * inv_len.y; simd_float3 vz = m.columns[2] * inv_len.z; return vfx_float3x3( len_is_zero.x ? vfx_cross(vy, vz) : vx, len_is_zero.y ? vfx_cross(vz, vx) : vy, len_is_zero.z ? vfx_cross(vx, vy) : vz ); } static inline simd_float3x3 VFX_SIMD vfx_extract_orthogonal(simd_float4x4 m) { return vfx_extract_orthogonal(vfx_float3x3(m.columns[0].xyz, m.columns[1].xyz, m.columns[2].xyz) ); } static inline simd_float4 VFX_SIMD vfx_quat_from_matrix3x3_(simd_float3x3 m) { return vfx_quat_(vfx_extract_orthogonal(m)); } static inline simd_float4 VFX_SIMD vfx_quat_from_matrix4x4_(simd_float4x4 m) { return vfx_quat_(vfx_extract_orthogonal(m)); } static inline simd_float4 VFX_SIMD vfx_quat_from_normal_(simd_float3 n) { n = vfx_robust_normalize(n); simd_float3 t,b; vfx_orthonormal_basis(n, &t, &b); return vfx_quat_(vfx_float3x3(t, b, n)); } #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_from_normal(simd_float3 n) { return _vfx_simd_quatf(vfx_quat_from_normal_(n)); } #endif static inline simd_float3 VFX_SIMD vfx_quat_to_normal(simd_float4 q) { return vfx_quat_to_matrix3x3(q).columns[2]; } #ifndef __METAL_VERSION__ static inline simd_float3 VFX_SIMD vfx_quat_to_normal(simd_quatf q) { return vfx_quat_to_normal(q.vector); } #endif // this function will produce random sign for 180deg rotation // use only when you know taht you deal with small rotation matrices static inline simd_float4 VFX_SIMD vfx_quat_from_matrix4x4_describing_small_rotation_(simd_float4x4 m) { // based on Branchless Matrix to Quaternion Conversion // from http://www.thetenthplanet.de/archives/1994 // warning : we store real in w, not in x simd_float4 sign = { 1.f, -1.f, -1.f, 1.f }; simd_float4 mx = sign.xxxx + m.columns[0][0] * sign + m.columns[1][1] * sign.yxzw + m.columns[2][2] * sign.yzxw; simd_float4 q = 0.5f * vfx_sqrt(vfx_max(vfx_float4_zero(), mx)); q.xyz = vfx_copysign(q.xyz, vfx_make_float3(m.columns[1][2] - m.columns[2][1], m.columns[2][0] - m.columns[0][2], m.columns[0][1] - m.columns[1][0])); return q; } #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_from_matrix4x4_describing_small_rotation(simd_float4x4 m) { return _vfx_simd_quatf(vfx_quat_from_matrix4x4_describing_small_rotation_(m));} #endif // LookAt static inline simd_float4 VFX_SIMD vfx_quat_look_at_(simd_float3 forward, simd_float3 up) { forward = vfx_robust_normalize(forward); // deal with forward and up colinear -> use forward.yzx (rotation) as up then simd_float3 v = vfx_cross(up, forward); simd_float3 length2 = vfx_length_squared(v); simd_float3 right = vfx_select(forward.yzx, v * vfx_rsqrt(length2), length2 != vfx_float3_zero()); up = vfx_normalize(vfx_cross(forward, right)); return vfx_quat_(vfx_float3x3(right, up, forward)); } // From-To static inline simd_float4 VFX_SIMD vfx_quat_rotation_between_fast_(simd_float3 a, simd_float3 b) { simd_float4 q; q.xyz = vfx_cross(a, b); q.w = vfx_sqrt(vfx_length_squared(a) * vfx_length_squared(b)) + vfx_dot(a, b); return vfx_quat_normalize(q); } #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_rotation_between_fast(simd_float3 a, simd_float3 b) { return _vfx_simd_quatf(vfx_quat_rotation_between_fast_(a, b)); } #endif // Axis-Angle static inline VFX_SIMD simd_float4 vfx_quat_from_axisangle_unit_(simd_float3 axis_unit, float angle) { return vfx_make_float4(sin(angle/2) * axis_unit, cos(angle/2)); } static inline simd_float4 VFX_SIMD vfx_quat_from_axisangle_(simd_float3 axis, float angle) { return vfx_quat_from_axisangle_unit_(vfx_robust_normalize(axis), angle); } static inline float VFX_SIMD vfx_quat_angle(simd_float4 q) { return 2 * vfx_atan2(vfx_length(q.xyz), q.w); } static inline simd_float3 VFX_SIMD vfx_quat_axis(simd_float4 q) { return vfx_robust_normalize(q.xyz); } static inline simd_float4 VFX_SIMD vfx_quat_from_axisangle_(simd_float4 axis_angle) { return vfx_quat_from_axisangle_(axis_angle.xyz, axis_angle.w); } static inline simd_float4 VFX_SIMD vfx_quat_to_axisangle_(simd_float4 q) { return vfx_make_float4(vfx_quat_axis(q), vfx_quat_angle(q)); } #ifndef __METAL_VERSION__ // simd_quatf versions static inline VFX_SIMD simd_quatf vfx_quat_from_axisangle_unit(simd_float3 axis_unit, float angle) { return _vfx_simd_quatf(vfx_quat_from_axisangle_unit_(axis_unit, angle)); } static inline simd_quatf VFX_SIMD vfx_quat_from_axisangle(simd_float3 axis, float angle) { return _vfx_simd_quatf(vfx_quat_from_axisangle_(axis, angle)); } static inline float VFX_SIMD vfx_quat_angle(simd_quatf q) { return vfx_quat_angle(q.vector); } static inline simd_float3 VFX_SIMD vfx_quat_axis(simd_quatf q) { return vfx_quat_axis(q.vector); } static inline simd_quatf VFX_SIMD vfx_quat_from_axisangle(simd_float4 axis_angle) { return _vfx_simd_quatf(vfx_quat_from_axisangle_(axis_angle)); } static inline simd_float4 VFX_SIMD vfx_quat_to_axisangle(simd_quatf q) { return vfx_quat_to_axisangle_(q.vector); } #endif #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_look_at(simd_float3 forward, simd_float3 up) { return _vfx_simd_quatf(vfx_quat_look_at_(forward, up)); } // Compute lookat orientation to direction with up vector orientation and local // front direction in transform local space. VFXNode convention defines front to // (0, 0, -1). static inline simd_quatf VFX_SIMD vfx_quat_look_at(simd_float3 direction, simd_float3 up, simd_float3 localFront) { simd_quatf orientation = vfx_quat_rotation_between(localFront, direction); // Current world space up vector of the orientation transform. simd_float3 orientationUp = vfx_quat_rotate_vector(orientation, (simd_float3){0, 1, 0}); float rollAngle = vfx_angle(orientationUp, up, direction); return vfx_quat_mul(vfx_quat_from_axisangle(direction, rollAngle), orientation); } #endif // Euler static inline simd_float4 VFX_SIMD vfx_quat_from_euler_(simd_float3 angles) { simd_float3 angles_2 = angles / 2; simd_float3 cs = vfx_cos(angles_2); simd_float3 sn = vfx_sin(angles_2); float cpcy = cs.y * cs.z; float spsy = sn.y * sn.z; return vfx_make_float4(sn.x * cpcy - cs.x * spsy, cs.x * sn.y * cs.z + sn.x * cs.y * sn.z, cs.x * cs.y * sn.z - sn.x * sn.y * cs.z, cs.x * cpcy + sn.x * spsy); } static inline simd_float4 VFX_SIMD vfx_quat_from_x_rotation(float angle) { float angle_2 = angle / 2; float cs = vfx_cos(angle_2); float sn = vfx_sin(angle_2); return vfx_make_float4(sn, 0.f, 0.f, cs); } static inline simd_float4 VFX_SIMD vfx_quat_from_y_rotation(float angle) { float angle_2 = angle / 2; float cs = vfx_cos(angle_2); float sn = vfx_sin(angle_2); return vfx_make_float4(0.f, sn, 0.f, cs); } static inline simd_float4 VFX_SIMD vfx_quat_from_z_rotation(float angle) { float angle_2 = angle / 2; float cs = vfx_cos(angle_2); float sn = vfx_sin(angle_2); return vfx_make_float4(0.f, 0.f, sn, cs); } static inline simd_float3 VFX_SIMD vfx_quat_to_euler(simd_float4 q) { simd_float4 sq = q * q; float unit = vfx_reduce_add(sq); //should be 1 if the quaternion is normalized if (unit == 0.f) return vfx_float4_zero().xyz; float test = (q.w * q.y - q.z * q.x) / unit; if (test > 0.4999) return vfx_make_float3( 2.f * vfx_atan2(q.x, q.w), VFX_PI_2, 0.f ); // singularity at north pole if (test < -0.4999) return vfx_make_float3( 2.f * vfx_atan2(q.x, q.w), -VFX_PI_2, 0.f ); // singularity at south pole return vfx_make_float3(vfx_atan2(2.f * (q.y * q.z + q.x * q.w), (-sq.x - sq.y + sq.z + sq.w)), vfx_asin(2.f * test), vfx_atan2(2.f * (q.x * q.y + q.z * q.w), ( sq.x - sq.y - sq.z + sq.w))); } // Math static inline VFX_SIMD simd_float4 vfx_quat_log(simd_float4 q) { float real = vfx_log(vfx_length_squared(q))/2; if (vfx_equal(q.xyz, vfx_float3_zero())) return vfx_make_float4(vfx_float3_zero(), real); simd_float3 imag = vfx_acos(q.w / vfx_length(q)) * vfx_normalize(q.xyz); return vfx_make_float4(imag, real); } static inline simd_float4 VFX_SIMD vfx_quat_ln(simd_float4 q) { float r = vfx_length(q.xyz); float t = r > 10E-5 ? vfx_atan2(r, q.w) / r : 0.0f; return vfx_make_float4(q.xyz * t, 0.5f * vfx_log(vfx_length_squared(q.xyz))); } static inline simd_float4 VFX_SIMD vfx_quat_exp(simd_float4 q) { float r = vfx_length(q.xyz); float et = vfx_exp(q.w); float s = r >= 10E-5 ? et * vfx_sin(r) / r : 0.0f; return vfx_make_float4(q.xyz * s, et * vfx_cos(r)); } static inline simd_float4 VFX_SIMD vfx_quat_pow(simd_float4 q, float p) { return vfx_quat_ln(p * vfx_quat_exp(q)); } // Interpolations // helpers static inline VFX_SIMD float _vfx_quat_angle(simd_float4 p, simd_float4 q) { // alternative version, TO TEST // float d = vfx_dot(p, q); // return d > 1 ? 0 : acos(d < -1 ? -1 : d); return 2 * vfx_atan2(vfx_length(p - q), vfx_length(p + q)); } static inline VFX_SIMD simd_float4 _vfx_quat_slerp_internal(simd_float4 q0, simd_float4 q1, float t) { float s = 1 - t; float a = _vfx_quat_angle(q0, q1); float r = vfx_recip(vfx_sinc(a)); return vfx_quat_normalize(vfx_sinc(s*a)*r*s*q0 + vfx_sinc(t*a)*r*t*q1); } // Spherical linear interpolation along the shortest arc between quaternions `q0` and `q1` static inline simd_float4 VFX_SIMD vfx_quat_slerp(simd_float4 from, simd_float4 to, float t) { if (vfx_dot(from, to) >= 0) return _vfx_quat_slerp_internal(from, to, t); return _vfx_quat_slerp_internal(from, vfx_quat_negate(to), t); } // Spherical linear interpolation along the longest arc between quaternions `q0` and `q1` static inline simd_float4 VFX_SIMD vfx_quat_slerp_longest(simd_float4 from, simd_float4 to, float t) { if (vfx_dot(from, to) >= 0) return _vfx_quat_slerp_internal(from, vfx_quat_negate(to), t); return _vfx_quat_slerp_internal(from, to, t); } static inline VFX_SIMD simd_float4 _vfx_quat_squad(simd_float4 q0, simd_float4 qa, simd_float4 qb, simd_float4 q1, float t) { simd_float4 r0 = _vfx_quat_slerp_internal(q0, q1, t); simd_float4 r1 = _vfx_quat_slerp_internal(qa, qb, t); return _vfx_quat_slerp_internal(r0, r1, 2*t*(1 - t)); } static inline VFX_SIMD simd_float4 _vfx_quat_squad_intermediate(simd_float4 q0, simd_float4 q1, simd_float4 q2) { simd_float4 p0 = vfx_quat_log(vfx_quat_mul(q0, vfx_quat_inverse(q1))); simd_float4 p2 = vfx_quat_log(vfx_quat_mul(q2, vfx_quat_inverse(q1))); return vfx_quat_normalize(vfx_quat_mul(q1, vfx_quat_exp(vfx_quat_mul(-0.25f, p0 + p2)))); } // aka Shoemake's Quaternion curves or spline // Interpolate between quaternions along a spherical cubic spline // The function interpolates between q1 and q2. q0 is the left // endpoint of the previous interval, and q3 is the right endpoint of the next // interval. Use this function to smoothly interpolate between a sequence of // rotations static inline simd_float4 VFX_SIMD vfx_quat_squad(simd_float4 q0, simd_float4 q1, simd_float4 q2, simd_float4 q3, float t) { simd_float4 qa = _vfx_quat_squad_intermediate(q0, q1, q2); simd_float4 qb = _vfx_quat_squad_intermediate(q1, q2, q3); return _vfx_quat_squad(q1, qa, qb, q2, t); } // Spherical cubic Bezier interpolation between quaternions // The function treats q0 ... q3 as control points and uses slerp // in place of lerp in the De Castlejeau algorithm. The endpoints of // interpolation are thus q0 and q3, and the curve will not generally pass // through q1 or q2. Note that the convex hull property of "standard" Bezier // curve does not hold on the sphere static inline simd_float4 VFX_SIMD vfx_quat_bezier(simd_float4 q0, simd_float4 q1, simd_float4 q2, simd_float4 q3, float t) { simd_float4 q01 = _vfx_quat_slerp_internal(q0, q1, t); simd_float4 q12 = _vfx_quat_slerp_internal(q1, q2, t); simd_float4 q23 = _vfx_quat_slerp_internal(q2, q3, t); simd_float4 q012 = _vfx_quat_slerp_internal(q01, q12, t); simd_float4 q123 = _vfx_quat_slerp_internal(q12, q23, t); return _vfx_quat_slerp_internal(q012, q123, t); } #ifndef __METAL_VERSION__ // simd_quatf versions static inline simd_quatf VFX_SIMD vfx_quat_from_matrix3x3(simd_float3x3 m) { return _vfx_simd_quatf(vfx_quat_from_matrix3x3_(m)); } static inline simd_quatf VFX_SIMD vfx_quat_from_matrix4x4(simd_float4x4 m) { return _vfx_simd_quatf(vfx_quat_from_matrix4x4_(m)); } static inline simd_quatf VFX_SIMD vfx_quat_rotation_between_unitvector(simd_float3 a, simd_float3 b) { return _vfx_simd_quatf(vfx_quat_rotation_between_unitvector_(a,b)); } static inline simd_quatf VFX_SIMD vfx_quat_from_euler(simd_float3 angles) { return _vfx_simd_quatf(vfx_quat_from_euler_(angles)); } static inline simd_float3 VFX_SIMD vfx_quat_to_euler(simd_quatf q) { return vfx_quat_to_euler(q.vector); } static inline VFX_SIMD simd_quatf vfx_quat_log(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_log(q.vector)); } static inline VFX_SIMD simd_quatf vfx_quat_ln(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_ln(q.vector)); } static inline VFX_SIMD simd_quatf vfx_quat_exp(simd_quatf q) { return _vfx_simd_quatf(vfx_quat_exp(q.vector)); } static inline VFX_SIMD simd_quatf vfx_quat_pow(simd_quatf q, float p) { return _vfx_simd_quatf(vfx_quat_pow(q.vector, p)); } static inline VFX_SIMD float _vfx_quat_angle(simd_float4 p, simd_quatf q) { return _vfx_quat_angle(p, q.vector); } static inline VFX_SIMD simd_quatf _vfx_quat_slerp_internal(simd_quatf q0, simd_quatf q1, float t) { return _vfx_simd_quatf(_vfx_quat_slerp_internal(q0.vector, q1.vector, t)); } static inline simd_quatf VFX_SIMD vfx_quat_slerp(simd_quatf from, simd_quatf to, float t) { return _vfx_simd_quatf(vfx_quat_slerp(from.vector, to.vector, t)); } static inline simd_quatf VFX_SIMD vfx_quat_slerp_longest(simd_quatf from, simd_quatf to, float t) { return _vfx_simd_quatf(vfx_quat_slerp_longest(from.vector, to.vector, t)); } static inline VFX_SIMD simd_quatf _vfx_quat_squad(simd_quatf q0, simd_quatf qa, simd_quatf qb, simd_quatf q1, float t) { return _vfx_simd_quatf(_vfx_quat_squad(q0.vector, qa.vector, qb.vector, q1.vector, t)); } static inline VFX_SIMD simd_quatf _vfx_quat_squad_intermediate(simd_quatf q0, simd_quatf q1, simd_quatf q2) { return _vfx_simd_quatf(_vfx_quat_squad_intermediate(q0.vector, q1.vector, q2.vector)); } static inline simd_quatf VFX_SIMD vfx_quat_squad(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { return _vfx_simd_quatf(vfx_quat_squad(q0.vector, q1.vector, q2.vector, q3.vector, t)); } static inline simd_quatf VFX_SIMD vfx_quat_bezier(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { return _vfx_simd_quatf(vfx_quat_bezier(q0.vector, q1.vector, q2.vector, q3.vector, t)); } #endif #pragma mark - Matrix2x2 #ifndef __METAL_VERSION__ static inline simd_float2x2 VFX_SIMD vfx_mul(simd_float2x2 a, simd_float2x2 b) { return simd_mul(a, b); } #else static inline simd_float2x2 VFX_SIMD vfx_mul(simd_float2x2 a, simd_float2x2 b) { return a * b; } #endif #pragma mark - Matrix3x3 #ifndef __METAL_VERSION__ static inline simd_float3x3 VFX_SIMD vfx_mul(simd_float3x3 a, simd_float3x3 b) { return simd_mul(a, b); } #else static inline simd_float3x3 VFX_SIMD vfx_mul(simd_float3x3 a, simd_float3x3 b) { return a * b; } #endif // Create a 2D transform with (a)ngle, (t)ranslation and (s)cale static inline simd_float3x3 VFX_SIMD vfx_make_transform(float a, simd_float2 t, simd_float2 s) { float cs = vfx_cos(a); float sn = vfx_sin(a); return vfx_float3x3( s.x * vfx_make_float3( cs, -sn, 0 ), s.y * vfx_make_float3( sn, cs, 0 ), vfx_make_float3( t, 1 ) ); } static inline simd_float2 VFX_SIMD vfx_transform(simd_float3x3 m, simd_float2 p) { simd_float2 result = m.columns[2].xy; result += p.x * m.columns[0].xy; result += p.y * m.columns[1].xy; return result; } static inline simd_float3x3 VFX_SIMD vfx_translate(simd_float3x3 m, simd_float2 t) { m.columns[2].xy += t.xy; return m; } static inline simd_float3x3 VFX_SIMD vfx_scale(simd_float3x3 m, simd_float2 s) { m.columns[0] *= s.x; m.columns[1] *= s.y; return m; } static inline simd_float3x3 VFX_SIMD vfx_scale(simd_float3x3 m, simd_float3 s) { m.columns[0] *= s.x; m.columns[1] *= s.y; m.columns[2] *= s.z; return m; } static inline simd_float2 VFX_SIMD vfx_get_translation_2D(simd_float3x3 m) { return m.columns[2].xy; } static inline simd_float2 VFX_SIMD vfx_get_scale_2D(simd_float3x3 m) { return vfx_make_float2(vfx_length(m.columns[0].xy), vfx_length(m.columns[1].xy)); } static inline simd_float3 VFX_SIMD vfx_get_scale(simd_float3x3 m) { return vfx_make_float3(vfx_length(m.columns[0].xyz), vfx_length(m.columns[1].xyz), vfx_length(m.columns[2].xyz)); } #pragma mark - Matrix4x4 #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_mul(simd_float4x4 a, simd_float4x4 b) { return simd_mul(a, b); } static inline simd_float4 VFX_SIMD vfx_mul(simd_float4x4 a, simd_float4 b) { return simd_mul(a, b); } #else static inline simd_float4x4 VFX_SIMD vfx_mul(simd_float4x4 a, simd_float4x4 b) { return a * b; } static inline simd_float4 VFX_SIMD vfx_mul(simd_float4x4 a, simd_float4 b) { return a * b; } #endif // Avoid zero scale. static inline simd_float3 VFX_SIMD vfx_robust_scale(simd_float3 s) { VFX_MATH_ASSERT(!vfx_is_almost_zero(s)); return vfx_copysign(vfx_max(vfx_abs(s), vfx_splat3(VFX_EPSILON)), s); } static inline simd_float3 VFX_SIMD vfx_transform(simd_float4x4 mat, simd_float3 pos) { return pos.xxx * mat.columns[0].xyz + pos.yyy * mat.columns[1].xyz + pos.zzz * mat.columns[2].xyz + mat.columns[3].xyz; } static inline simd_float4x4 VFX_SIMD vfx_translate(simd_float4x4 m, simd_float4 t_w1) { m.columns[3].xyz += t_w1.xyz; return m; } static inline simd_float4x4 VFX_SIMD vfx_translate(simd_float4x4 m, simd_float3 t) { m.columns[3].xyz += t; return m; } static inline simd_float4x4 VFX_SIMD vfx_scale(simd_float4x4 m, simd_float3 s) { m.columns[0] *= s.x; m.columns[1] *= s.y; m.columns[2] *= s.z; return m; } static inline simd_float3 VFX_SIMD vfx_get_translation(simd_float4x4 m) { return m.columns[3].xyz; } static inline simd_float4x4 VFX_SIMD vfx_set_translation(simd_float4x4 m, simd_float3 t) { simd_float4x4 c = m; c.columns[3].xyz = t; return c; } static inline simd_float3 VFX_SIMD vfx_get_scale(simd_float4x4 m) { simd_float3 crs = vfx_cross( m.columns[1].xyz, m.columns[2].xyz ); float sign = vfx_dot( m.columns[0].xyz, crs ) < 0.f ? -1.f : 1.f; return sign * (simd_float3){ vfx_length( m.columns[0].xyz ), vfx_length( m.columns[1].xyz ), vfx_length( m.columns[2].xyz ) }; } static inline simd_float3x3 VFX_SIMD vfx_get_rotation(simd_float4x4 m) { return vfx_float3x3(vfx_robust_normalize(m.columns[0].xyz), vfx_robust_normalize(m.columns[1].xyz), vfx_robust_normalize(m.columns[2].xyz)); } static inline float VFX_SIMD vfx_get_length(simd_float4x4 m) { simd_float4 sumsqr = m.columns[0] * m.columns[0]; sumsqr += m.columns[1] * m.columns[1]; sumsqr += m.columns[2] * m.columns[2]; sumsqr += m.columns[3] * m.columns[3]; return vfx_reduce_add(sumsqr); } #ifndef __METAL_VERSION__ static inline bool VFX_SIMD vfx_matrix_is_identity(simd_float4x4 m) { return simd_almost_equal_elements(m, matrix_identity_float4x4, 0.00001f); } // Check wether the matrix contains infinity or NaN. static inline bool VFX_SIMD vfx_matrix_is_valid(simd_float4x4 m) { union { simd_float4 f; simd_uint4 u;}c0 = {m.columns[0]}; union { simd_float4 f; simd_uint4 u;}c1 = {m.columns[1]}; union { simd_float4 f; simd_uint4 u;}c2 = {m.columns[2]}; union { simd_float4 f; simd_uint4 u;}c3 = {m.columns[3]}; // Check NaN or Infinite. return !simd_any((c0.u & 0x7fffffff) >> 23 == 0xff || (c1.u & 0x7fffffff) >> 23 == 0xff || (c2.u & 0x7fffffff) >> 23 == 0xff || (c3.u & 0x7fffffff) >> 23 == 0xff); } #endif static inline simd_float4x4 VFX_SIMD vfx_clear_scale(simd_float4x4 m) { simd_float4x4 copy = m; copy.columns[0].xyz = vfx_normalize( m.columns[0].xyz ); copy.columns[1].xyz = vfx_normalize( m.columns[1].xyz ); copy.columns[2].xyz = vfx_normalize( m.columns[2].xyz ); return copy; } static inline simd_float4x4 VFX_SIMD vfx_make_transform(simd_float4 q, simd_float4 t_w1, simd_float3 scale) { scale = vfx_robust_scale(scale); simd_float3 q2 = q.xyz * q.xyz; simd_float3 wq = q.w * q.xyz; float xy = q.x * q.y; float yz = q.y * q.z; float xz = q.x * q.z; simd_float3 s = scale + scale; // faster than 2 * scale return (simd_float4x4){ s.x * vfx_make_float4(0.5f - q2.y - q2.z, xy + wq.z, xz - wq.y, 0), s.y * vfx_make_float4(xy - wq.z, 0.5f - q2.x - q2.z, yz + wq.x, 0), s.z * vfx_make_float4(xz + wq.y, yz - wq.x, 0.5f - q2.x - q2.y, 0), t_w1 }; } static inline simd_float4x4 VFX_SIMD vfx_make_transform(simd_float4 q, simd_float3 trans, simd_float3 scale) { return vfx_make_transform(q, vfx_make_float4(trans, 1.f), scale); } #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_make_transform(simd_quatf q, simd_float3 trans, simd_float3 scale) { return vfx_make_transform(q.vector, vfx_make_float4(trans, 1.f), scale); } #endif static inline simd_float4x4 VFX_SIMD vfx_make_transform_rotation_translation(simd_float4 q, simd_float4 t_w1) { return vfx_make_transform(q, t_w1, vfx_splat3(1.f)); } #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_make_transform_rotation_translation(simd_quatf q, simd_float4 t_w1) { return vfx_make_transform_rotation_translation(q.vector, t_w1); } #endif static inline simd_float4x4 VFX_SIMD vfx_make_transform_rotation_scale(simd_float4 q, simd_float3 scale) { return vfx_make_transform(q, vfx_make_float4(0.f, 0.f, 0.f, 1.f), scale); } #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_make_transform_rotation_scale(simd_quatf q, simd_float3 scale) { return vfx_make_transform_rotation_scale(q.vector, scale); } #endif static inline simd_float4x4 VFX_SIMD vfx_make_transform_translation_scale(simd_float4 t_w1, simd_float3 s) { s = vfx_robust_scale(s); return vfx_float4x4( vfx_make_float4( s.x, 0.f, 0.f, 0.f ), vfx_make_float4( 0.f, s.y, 0.f, 0.f ), vfx_make_float4( 0.f, 0.f, s.z, 0.f ), t_w1); } static inline simd_float4x4 VFX_SIMD vfx_make_translation(simd_float4 t_w1) { return (simd_float4x4){ vfx_make_float4( 1.f, 0.f, 0.f, 0.f ), vfx_make_float4( 0.f, 1.f, 0.f, 0.f ), vfx_make_float4( 0.f, 0.f, 1.f, 0.f ), t_w1}; } static inline simd_float4x4 VFX_SIMD vfx_make_translation(simd_float3 t) { return (simd_float4x4){ vfx_make_float4( 1.f, 0.f, 0.f, 0.f ), vfx_make_float4( 0.f, 1.f, 0.f, 0.f ), vfx_make_float4( 0.f, 0.f, 1.f, 0.f ), vfx_make_float4( t, 1.f ) };} static inline simd_float4x4 VFX_SIMD vfx_make_scale(simd_float3 scale) { return vfx_diagonal(vfx_make_float4(vfx_robust_scale(scale), 1.f)); } static inline simd_float4x4 VFX_SIMD vfx_make_homothety(simd_float3 scale, simd_float3 center) { return (simd_float4x4) { vfx_make_float4(scale.x, 0.0, 0.0, 0.0), vfx_make_float4(0.0, scale.y, 0.0, 0.0), vfx_make_float4(0.0, 0.0, scale.z, 0.0), vfx_make_float4((1 - scale.x) * center.x, (1 - scale.y) * center.y, (1 - scale.z) * center.z, 1.0) }; } static inline simd_float4x4 VFX_SIMD vfx_make_rotation(simd_float4 q) { return vfx_make_transform_rotation_translation(q, vfx_float4_unit_w()); } #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_make_rotation(simd_quatf q) { return vfx_make_rotation(q.vector); } #endif static inline simd_float4x4 VFX_SIMD vfx_matrix_float4x4_make_look_at(simd_float3 eye, simd_float3 center, simd_float3 up) { simd_float4 axisZ = vfx_make_float4(vfx_normalize(center - eye)); simd_float4 axisX = vfx_make_float4(vfx_normalize(vfx_cross(axisZ.xyz, vfx_normalize(up)))); simd_float4 axisY = vfx_make_float4(vfx_cross(axisX.xyz, axisZ.xyz)); axisX.w = -vfx_dot(axisX.xyz, eye); axisY.w = -vfx_dot(axisY.xyz, eye); axisZ.w = -vfx_dot(axisZ.xyz, eye); return vfx_float4x4_from_rows(axisX, axisY, -axisZ, vfx_float4_unit_w()); } static inline simd_float3x3 VFX_SIMD vfx_inverse_fast(simd_float3x3 m) { simd_float3 a = m.columns[0]; simd_float3 b = m.columns[1]; simd_float3 c = m.columns[2]; simd_float3 r0 = vfx_cross(b, c); simd_float3 r1 = vfx_cross(c, a); simd_float3 r2 = vfx_cross(a, b); float inv_det = 1.f / vfx_dot(r2, c); return vfx_float3x3_from_rows(r0 * inv_det, r1 * inv_det, r2 * inv_det); } // This is only valid with a transform matrix (i.e. rows.3 = 0,0,0,1) static inline simd_float4x4 VFX_SIMD vfx_inverse_fast(simd_float4x4 m) { simd_float3 a = m.columns[0].xyz; simd_float3 b = m.columns[1].xyz; simd_float3 c = m.columns[2].xyz; simd_float3 d = m.columns[3].xyz; float x = m.columns[0].w; float y = m.columns[1].w; float z = m.columns[2].w; float w = m.columns[3].w; simd_float3 s = vfx_cross(a, b); simd_float3 t = vfx_cross(c, d); simd_float3 u = a * y - b * x; simd_float3 v = c * w - d * z; float inv_det = 1.f / (vfx_dot(s, v) + vfx_dot(t, u)); s *= inv_det; t *= inv_det; u *= inv_det; v *= inv_det; simd_float3 r0 = vfx_cross(b, v) + t * y; simd_float3 r1 = vfx_cross(v, a) - t * x; simd_float3 r2 = vfx_cross(d, u) + s * w; simd_float3 r3 = vfx_cross(u, c) - s * z; return vfx_float4x4_from_rows(vfx_make_float4( r0.x, r0.y, r0.z, -vfx_dot(b, t)), vfx_make_float4( r1.x, r1.y, r1.z, vfx_dot(a, t)), vfx_make_float4( r2.x, r2.y, r2.z, -vfx_dot(d, s)), vfx_make_float4( r3.x, r3.y, r3.z, vfx_dot(c, s))); } static inline simd_float4x4 VFX_SIMD vfx_inverse_transpose(simd_float4x4 m) { simd_float3 scale_squared = vfx_dot_x3(m.columns[0].xyz, m.columns[0].xyz, m.columns[1].xyz, m.columns[1].xyz, m.columns[2].xyz, m.columns[2].xyz); // this avoid NaN but still flatten the features (e.g. a plane has no more valid normal when scaled with 0) simd_float3 inv_scale_squared = vfx_select(1 / scale_squared, scale_squared, scale_squared == 0); return vfx_scale(m, inv_scale_squared); } // Do we keep? static inline simd_float4x4 VFX_SIMD vfx_matrix_compute_normal(simd_float4x4 m) { // Matrix3x3 determinant // this was the original code but it sems to be the transpose of the one we add before... // float det = simd_determinant(simd_float3x3(m)); float det = m.columns[0][0] * (m.columns[1][1] * m.columns[2][2] - m.columns[1][2] * m.columns[2][1]) - m.columns[1][0] * (m.columns[0][1] * m.columns[2][2] - m.columns[0][2] * m.columns[2][1]) + m.columns[2][0] * (m.columns[0][1] * m.columns[1][2] - m.columns[0][2] * m.columns[1][1]); if (det == 0.) return vfx_float4x4_identity(); det = 1.f / det; // Invert and transpose in a single step. simd_float4x4 out; out.columns[0][0] = (m.columns[1][1] * m.columns[2][2] - m.columns[2][1] * m.columns[1][2]) * det; out.columns[0][1] = -(m.columns[1][0] * m.columns[2][2] - m.columns[1][2] * m.columns[2][0]) * det; out.columns[0][2] = (m.columns[1][0] * m.columns[2][1] - m.columns[1][1] * m.columns[2][0]) * det; out.columns[0][3] = 0.; out.columns[1][0] = -(m.columns[0][1] * m.columns[2][2] - m.columns[2][1] * m.columns[0][2]) * det; out.columns[1][1] = (m.columns[0][0] * m.columns[2][2] - m.columns[0][2] * m.columns[2][0]) * det; out.columns[1][2] = -(m.columns[0][0] * m.columns[2][1] - m.columns[0][1] * m.columns[2][0]) * det; out.columns[1][3] = 0.; out.columns[2][0] = (m.columns[0][1] * m.columns[1][2] - m.columns[0][2] * m.columns[1][1]) * det; out.columns[2][1] = -(m.columns[0][0] * m.columns[1][2] - m.columns[0][2] * m.columns[1][0]) * det; out.columns[2][2] = (m.columns[0][0] * m.columns[1][1] - m.columns[1][0] * m.columns[0][1]) * det; out.columns[2][3] = 0.; out.columns[3][0] = 0.; out.columns[3][1] = 0.; out.columns[3][2] = 0.; out.columns[3][3] = 1.; return out; } static inline simd_float3 VFX_SIMD vfx_transform_direction(simd_float4x4 m, simd_float3 d) { return d.x * m.columns[0].xyz + (d.y * m.columns[1].xyz + (d.z * m.columns[2].xyz)); } static inline simd_float3 VFX_SIMD vfx_transform_direction(simd_float3x3 m, simd_float3 d) { return d.x * m.columns[0] + (d.y * m.columns[1] + (d.z * m.columns[2])); } // this will keep original length static inline simd_float3 VFX_SIMD vfx_rotate_direction(simd_float4x4 m, simd_float3 d) { return vfx_rescale(vfx_transform_direction(m, d), vfx_length(d)); } static inline simd_float4 VFX_SIMD vfx_transform_direction(simd_float4x4 m, simd_float4 d) { return d.x * m.columns[0] + (d.y * m.columns[1] + (d.z * m.columns[2])); } static inline simd_float3 VFX_SIMD vfx_transform_normal(simd_float4x4 m, simd_float3 d) { return vfx_transform_direction(vfx_inverse_transpose(m), d); } static inline simd_float3 VFX_SIMD vfx_transform_position(simd_float4x4 m, simd_float3 p) { return p.x * m.columns[0].xyz + (p.y * m.columns[1].xyz + (p.z * m.columns[2].xyz + m.columns[3].xyz)); } static inline simd_float4 VFX_SIMD vfx_transform_position(simd_float4x4 m, simd_float4 p) { return p.x * m.columns[0] + (p.y * m.columns[1] + (p.z * m.columns[2] + m.columns[3])); } static inline simd_float3 VFX_SIMD vfx_project_position(simd_float4x4 m, simd_float3 p) { simd_float4 result = p.x * m.columns[0] + (p.y * m.columns[1] + (p.z * m.columns[2] + m.columns[3])); if (result.w != 0) { result /= result.w; } else { result /= 1e-8f; } return result.xyz; } static inline simd_float4x4 VFX_SIMD vfx_make_rotation_around(simd_float4 q, simd_float3 center) { simd_float3 tr = vfx_quat_rotate_vector(q, -center); return vfx_make_transform_rotation_translation(q, vfx_make_float4(tr + center, 1)); } #ifndef __METAL_VERSION__ static inline simd_float4x4 VFX_SIMD vfx_make_rotation_around(simd_quatf q, simd_float3 center) { return vfx_make_rotation_around(q.vector, center); } #endif #ifndef __METAL_VERSION__ static inline bool vfx_decompose_affine(simd_float4x4 m, vfx_out simd_quatf * _Nullable quaternion, vfx_out simd_float3 * _Nullable position, vfx_out simd_float3 * _Nullable scale) { bool success = true; if (scale) { *scale = vfx_get_scale(m); //clear NaNs #if !defined(__FAST_MATH__) if (vfx_contains_nan(*scale)) { success = false; *scale = (simd_float3){0.f, 0.f, 0.f}; } #endif } if (position) { *position = vfx_get_translation(m); } if (quaternion) { simd_float3 s; if (scale) { s = *scale; } else { s = vfx_get_scale(m); //clear NaNs #if !defined(__FAST_MATH__) if (vfx_contains_nan(s)) { success = false; s = (simd_float3){0.f, 0.f, 0.f}; } #endif } const simd_float3 zero = ( 0.f ); if (simd_any(s == zero)) { //invalid matrix success = false; *quaternion = vfx_quat_identity(); if (scale) { *scale = (simd_float3){0.f, 0.f, 0.f}; } } else { m.columns[0].xyz /= s.x; m.columns[1].xyz /= s.y; m.columns[2].xyz /= s.z; *quaternion = vfx_quat_from_matrix4x4(m); //clear NaNs #if !defined(__FAST_MATH__) if (vfx_contains_nan(quaternion->vector)) { success = false; *quaternion = vfx_quat_identity(); } #endif } } return success; } static inline simd_float4x4 VFX_SIMD vfx_matrix_lerp(simd_float4x4 m1, simd_float4x4 m2, float alpha) { return simd_linear_combination(1.f - alpha, m1, alpha, m2); } static inline simd_float4x4 VFX_SIMD vfx_matrix_interpolate(simd_float4x4 m1, simd_float4x4 m2, float alpha) { simd_quatf q1, q2; simd_float3 t1, t2, s1, s2; bool aff = vfx_decompose_affine(m1, &q1, &t1, &s1); aff &= vfx_decompose_affine(m2, &q2, &t2, &s2); if (!aff) // if not affine can't interpolate return m1; return vfx_make_transform(vfx_quat_slerp(q1, q2, alpha), simd_mix(t1, t2, alpha), simd_mix(s1, s2, alpha)); } #endif static inline simd_float3x3 VFX_SIMD vfx_make_rotation(simd_float3 axis, float angle) { axis = vfx_robust_normalize(axis); float cs = vfx_cos(angle); float sn = vfx_sin(angle); simd_float3 a2 = axis * axis; simd_float3 d = (1.f - a2) * cs + a2; float ic = 1 - cs; float xy = axis.x * axis.y * ic; float xz = axis.x * axis.z * ic; float yz = axis.y * axis.z * ic; simd_float3 as = axis * sn; return vfx_float3x3(vfx_make_float3(d.x, xy - as.z, xz + as.y), vfx_make_float3(xy + as.z, d.y, yz - as.x), vfx_make_float3(xz - as.y, yz + as.x, d.z)); } static inline simd_float4x4 VFX_SIMD vfx_make4x4_from_axis_angle(simd_float3 axis, float angle) { return vfx_float4x4(vfx_make_rotation(axis, angle)); } static inline simd_float3x3 VFX_SIMD vfx_make_rotation_x(float angle) { float cs = vfx_cos(angle); float sn = vfx_sin(angle); return vfx_float3x3(vfx_make_float3(1, 0, 0), vfx_make_float3(0, cs, sn), vfx_make_float3(0, -sn, cs)); } static inline simd_float3x3 VFX_SIMD vfx_make_rotation_y(float angle) { float cs = vfx_cos(angle); float sn = vfx_sin(angle); return vfx_float3x3(vfx_make_float3( cs, 0, -sn), vfx_make_float3( 0, 1, 0), vfx_make_float3(sn, 0, cs)); } static inline simd_float3x3 VFX_SIMD vfx_make_rotation_z(float angle) { float cs = vfx_cos(angle); float sn = vfx_sin(angle); return vfx_float3x3(vfx_make_float3(cs, sn, 0), vfx_make_float3(-sn, cs, 0), vfx_make_float3(0, 0, 1)); } static inline simd_float4x4 VFX_SIMD vfx_make_rotation(simd_float3 euler) { simd_float3x3 m1 = vfx_make_rotation_x(euler.x); simd_float3x3 m2 = vfx_make_rotation_y(euler.y); simd_float3x3 m3 = vfx_make_rotation_z(euler.z); return vfx_float4x4(vfx_mul(m3, vfx_mul(m2, m1))); } static inline float VFX_SIMD vfx_determinant(simd_float3x3 m) { #ifndef __METAL_VERSION__ return simd_determinant(m); #else return determinant(m); #endif } static inline float VFX_SIMD vfx_determinant_3x3(simd_float4x4 m) { #ifndef __METAL_VERSION__ return simd_determinant(vfx_float3x3(m)); #else return determinant(vfx_float3x3(m)); #endif } #pragma mark - Masks #ifndef __METAL_VERSION__ // Creates a 4-bit mask from the most significant bits of the four 32 bits integer values static inline VFX_SIMD int vfx_reduce_mask(simd_int4 inMask) { #if defined(__SSE__) return _mm_movemask_ps(inMask); #else return simd_reduce_add((inMask < 0) & (simd_int4){ 0x1, 0x2, 0x4, 0x8 }); #endif } // Creates a 8-bit mask from the most significant bits of the 8 16 bits integer values static inline int VFX_SIMD vfx_reduce_mask(simd_short8 inMask) { #if defined(__SSE2__) int x = _mm_movemask_epi8(inMask); // compact 1by1 limited (3 shift only because result is not larger than 8 bits) x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 x = (x ^ (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 x = (x ^ (x >> 2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 return (x ^ (x >> 4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 #else return simd_reduce_add(inMask & (simd_short8){ 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 }); #endif } // Creates a 8-bit mask from the most significant bits of the eight 32 bits integer values static inline int VFX_SIMD vfx_reduce_mask(simd_int8 inMask) { #if defined(__AVX__) return _mm256_movemask_ps(inMask); #else return simd_reduce_add((inMask < 0) & (simd_int8){ 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 }); #endif } #endif // Should move to vfx_random.h static inline uint64_t VFX_SIMD vfx_hash_combine64(uint64_t hash, uint64_t v) { return hash ^ (v + 0x9e3779b97f4a7c15 + (hash << 6) + (hash >> 2)); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, int32_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, uint32_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, size_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } // Defined for translation to swift Int static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, int64_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, int8_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, uint8_t v) { return vfx_hash_combine64(hash, (uint64_t)v); } #ifndef __METAL_VERSION__ static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, float v) { return vfx_hash_combine64(hash, (uint64_t)*(uint32_t*)&v); } static inline uint64_t VFX_SIMD vfx_hash_combine(uint64_t hash, simd_float2 v) { return vfx_hash_combine64(hash, *(uint64_t*)&v); } #endif // hash / random // https://www.pcg-random.org/ static inline uint32_t VFX_SIMD_UNPURE vfx_hash_pcg(vfx_out uint32_t* s) { *s = *s * 747796405u + 2891336453u; uint32_t seed = *s; uint32_t word = ((seed >> ((seed >> 28u) + 4u)) ^ seed) * 277803737u; return (word >> 22u) ^ word; } static inline float VFX_SIMD_UNPURE vfx_random01(vfx_out uint32_t* seed) { // takes a properly initialized seed and advance it with every call, // so we don't have to handle it manually uint32_t u = 0x7Fu << 23 | vfx_hash_pcg(seed) >> 9; // convert int32 to float in [0..1] // only generates all dyadic rationals of the form k / 2^-23 equally #ifdef __METAL_VERSION__ return as_type(u) - 1.f; #else return *((float *)(&u)) - 1.f; #endif } static inline simd_float2 VFX_SIMD_UNPURE vfx_random01_float2(vfx_out uint32_t* seed) { return vfx_make_float2(vfx_random01(seed), vfx_random01(seed)); } static inline simd_float3 VFX_SIMD_UNPURE vfx_random01_float3(vfx_out uint32_t* seed) { return vfx_make_float3(vfx_random01(seed), vfx_random01(seed), vfx_random01(seed)); } static inline simd_float4 VFX_SIMD_UNPURE vfx_random01_float4(vfx_out uint32_t* seed) { return vfx_make_float4(vfx_random01(seed), vfx_random01(seed), vfx_random01(seed), vfx_random01(seed)); } static inline bool VFX_SIMD_UNPURE vfx_random_bool(vfx_out uint32_t* seed) { return vfx_random01(seed) < 0.5; } static inline bool VFX_SIMD_UNPURE vfx_random_bool(vfx_out uint32_t* seed, float probability) { return vfx_random01(seed) < probability; } // Only for graph codegen. static inline float VFX_SIMD vfx_random01(uint32_t seed) { uint32_t s = seed; return vfx_random01(&s); } static inline simd_float2 VFX_SIMD vfx_random01_float2(uint32_t seed) { uint32_t s = seed; return vfx_random01_float2(&s); } static inline simd_float3 VFX_SIMD vfx_random01_float3(uint32_t seed) { uint32_t s = seed; return vfx_random01_float3(&s); } static inline simd_float4 VFX_SIMD vfx_random01_float4(uint32_t seed) { uint32_t s = seed; return vfx_random01_float4(&s); } static inline vfx_int_range VFX_SIMD vfx_make_int_range(size_t lo, size_t hi) { return (vfx_int_range){ lo, hi }; } static inline vfx_float_range VFX_SIMD vfx_make_float_range(float lo, float hi) { return (vfx_float_range){ lo, hi }; } static inline bool vfx_range_is_empty(vfx_float_range range) { return range.lower_bound == range.upper_bound; } // t must be [0..1] static inline float vfx_range_interpolate(vfx_float_range range, float t) { return range.lower_bound + (range.upper_bound - range.lower_bound) * t; } static inline float VFX_SIMD_UNPURE vfx_random_float_in_range(vfx_out uint32_t* seed, vfx_float_range range) { return vfx_range_is_empty(range) ? range.lower_bound : vfx_range_interpolate(range, vfx_random01(seed)); } static inline simd_float2 VFX_SIMD_UNPURE vfx_random_float2_in_range(vfx_out uint32_t* seed, vfx_float_range xRange, vfx_float_range yRange) { return (simd_float2){ vfx_random_float_in_range(seed, xRange), vfx_random_float_in_range(seed, yRange) }; } static inline simd_float3 VFX_SIMD_UNPURE vfx_random_float3_in_range(vfx_out uint32_t* seed, vfx_float_range xRange, vfx_float_range yRange, vfx_float_range zRange) { return (simd_float3){ vfx_random_float_in_range(seed, xRange), vfx_random_float_in_range(seed, yRange), vfx_random_float_in_range(seed, zRange) }; } // scale bias static inline vfx_scale_bias VFX_SIMD vfx_make_scale_bias(float start, float end) { return (vfx_scale_bias){ 1.f / (end - start), -start / (end - start) }; } static inline vfx_scale_bias VFX_SIMD vfx_make_scale_bias(vfx_float_range range) { return vfx_make_scale_bias(range.lower_bound, range.upper_bound); } static inline float VFX_SIMD vfx_apply(float x, vfx_scale_bias scale_bias) { return vfx_muladd(x, scale_bias.scale, scale_bias.bias); } static inline float VFX_SIMD vfx_apply_saturate(float x, vfx_scale_bias scale_bias) { return vfx_saturate(vfx_muladd(x, scale_bias.scale, scale_bias.bias)); } #undef simd_bitselect #ifndef __METAL_VERSION__ // These are dummy functions only there to cope with bad Swift debug code static inline simd_float4 VFX_SIMD vfx_add(simd_float4 a, simd_float4 b) { return a + b; } static inline simd_float3 VFX_SIMD vfx_add(simd_float3 a, simd_float3 b) { return a + b; } static inline simd_float2 VFX_SIMD vfx_add(simd_float2 a, simd_float2 b) { return a + b; } static inline simd_float4 VFX_SIMD vfx_add(simd_float4 a, float b) { return a + b; } static inline simd_float3 VFX_SIMD vfx_add(simd_float3 a, float b) { return a + b; } static inline simd_float2 VFX_SIMD vfx_add(simd_float2 a, float b) { return a + b; } static inline simd_float4 VFX_SIMD vfx_add(float a, simd_float4 b) { return a + b; } static inline simd_float3 VFX_SIMD vfx_add(float a, simd_float3 b) { return a + b; } static inline simd_float2 VFX_SIMD vfx_add(float a, simd_float2 b) { return a + b; } static inline simd_float4 VFX_SIMD vfx_sub(simd_float4 a, simd_float4 b) { return a - b; } static inline simd_float3 VFX_SIMD vfx_sub(simd_float3 a, simd_float3 b) { return a - b; } static inline simd_float2 VFX_SIMD vfx_sub(simd_float2 a, simd_float2 b) { return a - b; } static inline simd_float4 VFX_SIMD vfx_sub(simd_float4 a, float b) { return a - b; } static inline simd_float3 VFX_SIMD vfx_sub(simd_float3 a, float b) { return a - b; } static inline simd_float2 VFX_SIMD vfx_sub(simd_float2 a, float b) { return a - b; } static inline simd_float4 VFX_SIMD vfx_sub(float a, simd_float4 b) { return a - b; } static inline simd_float3 VFX_SIMD vfx_sub(float a, simd_float3 b) { return a - b; } static inline simd_float2 VFX_SIMD vfx_sub(float a, simd_float2 b) { return a - b; } static inline simd_float4 VFX_SIMD vfx_mul(simd_float4 a, simd_float4 b) { return a * b; } static inline simd_float3 VFX_SIMD vfx_mul(simd_float3 a, simd_float3 b) { return a * b; } static inline simd_float2 VFX_SIMD vfx_mul(simd_float2 a, simd_float2 b) { return a * b; } static inline simd_float4 VFX_SIMD vfx_mul(simd_float4 a, float b) { return a * b; } static inline simd_float3 VFX_SIMD vfx_mul(simd_float3 a, float b) { return a * b; } static inline simd_float2 VFX_SIMD vfx_mul(simd_float2 a, float b) { return a * b; } static inline simd_float4 VFX_SIMD vfx_mul(float a, simd_float4 b) { return a * b; } static inline simd_float3 VFX_SIMD vfx_mul(float a, simd_float3 b) { return a * b; } static inline simd_float2 VFX_SIMD vfx_mul(float a, simd_float2 b) { return a * b; } static inline simd_float4 VFX_SIMD vfx_div(simd_float4 a, simd_float4 b) { return a / b; } static inline simd_float3 VFX_SIMD vfx_div(simd_float3 a, simd_float3 b) { return a / b; } static inline simd_float2 VFX_SIMD vfx_div(simd_float2 a, simd_float2 b) { return a / b; } static inline simd_float4 VFX_SIMD vfx_div(simd_float4 a, float b) { return a / b; } static inline simd_float3 VFX_SIMD vfx_div(simd_float3 a, float b) { return a / b; } static inline simd_float2 VFX_SIMD vfx_div(simd_float2 a, float b) { return a / b; } static inline simd_float4 VFX_SIMD vfx_div(float a, simd_float4 b) { return a / b; } static inline simd_float3 VFX_SIMD vfx_div(float a, simd_float3 b) { return a / b; } static inline simd_float2 VFX_SIMD vfx_div(float a, simd_float2 b) { return a / b; } #endif #pragma mark - #pragma mark Curves static inline simd_float3 VFX_SIMD vfx_bezier(float t, simd_float3 p0, simd_float3 p1, simd_float3 p2) { float omt = 1.0f - t; return (omt * omt) * p0 + (2.f * omt * t) * p1 + (t * t) * p2; } static inline simd_float3 VFX_SIMD vfx_bezier_derivative(float t, simd_float3 p0, simd_float3 p1, simd_float3 p2) { float omt = 1.0f - t; return 2.f * omt * (p1 - p0) + 2.f * t * (p2 - p1); } #pragma mark - #pragma mark Barycentric coordinates static inline simd_float2 VFX_SIMD vfx_barycentric_mix(simd_float2 __x, simd_float2 __y, simd_float2 __z, simd_float3 __t) { return __t.x * __x + __t.y * __y + __t.z * __z; } static inline simd_float3 VFX_SIMD vfx_barycentric_mix(simd_float3 __x, simd_float3 __y, simd_float3 __z, simd_float3 __t) { return __t.x * __x + __t.y * __y + __t.z * __z; } static inline simd_float4 VFX_SIMD vfx_barycentric_mix(simd_float4 __x, simd_float4 __y, simd_float4 __z, simd_float3 __t) { return __t.x * __x + __t.y * __y + __t.z * __z; } #pragma mark - Color helpers // linear / srgb conversion static inline float VFX_SIMD vfx_srgb_to_grayscale(simd_float4 v) { return 0.3f * v.x + 0.59f * v.y + 0.11f * v.z; } static inline simd_float3 VFX_SIMD vfx_srgb_to_linear(simd_float3 srgb) { return vfx_select(vfx_pow((srgb + 0.055f) / 1.055f, 2.4f), srgb / 12.92f, srgb <= 0.04045f); } static inline simd_float3 VFX_SIMD vfx_linear_to_srgb(simd_float3 linear) { return vfx_select(1.055f * vfx_pow(linear, 1.f / 2.4f) - 0.055f, linear * 12.92f, linear <= 0.0031308f); } static inline simd_float4 VFX_SIMD vfx_srgb_to_linear(simd_float4 srgb) { srgb.xyz = vfx_srgb_to_linear(srgb.xyz); return srgb; } static inline simd_float4 VFX_SIMD vfx_linear_to_srgb(simd_float4 linear) { linear.xyz = vfx_linear_to_srgb(linear.xyz); return linear; } #ifdef __METAL_VERSION__ // half3 metal only versions static inline half3 VFX_SIMD vfx_srgb_to_linear(half3 srgb) { return vfx_select(vfx_pow((srgb + 0.055h) / 1.055h, 2.4h), srgb / 12.92h, srgb <= 0.04045h); } static inline half3 VFX_SIMD vfx_linear_to_srgb(half3 linear) { return vfx_select(1.055h * vfx_pow(linear, 1.h / 2.4h) - 0.055h, linear * 12.92h, linear <= 0.0031308h); } #endif // hsb static inline simd_float4 VFX_SIMD vfx_rgb_to_hsb(simd_float4 rgba) { float min = vfx_reduce_min(rgba.rgb); float max = vfx_reduce_max(rgba.rgb); float delta = max - min; float hue = 0; if (delta != 0) { if (rgba.r == max) { hue = (rgba.g - rgba.b) / delta; } else if (rgba.g == max) { hue = 2 + (rgba.b - rgba.r) / delta; } else { // color.b == max hue = 4 + (rgba.r - rgba.g) / delta; } hue = (hue < 0) ? (hue * 0.1666666666f + 1.f) : hue * 0.1666666666f; } float brightness = max; float saturation = max != 0 ? (max - min) / max : 0; return (simd_float4){ hue, saturation, brightness, rgba.a }; } static inline simd_float3 VFX_SIMD vfx_rgb_to_hsb(simd_float3 rgb) { return vfx_rgb_to_hsb(vfx_make_float4_undef(rgb)).xyz; } static inline simd_float4 VFX_SIMD vfx_hsb_to_rgb(simd_float4 hsba) { simd_float3 hsb = vfx_saturate(hsba.rgb); float hue = ((hsb.x == 1.f) ? 0.f : hsb.x) * 6.f; float saturation = hsb.y; float brightness = hsb.z; if (saturation == 0) { return vfx_make_float4(hsb.zzz, hsba.a); } int i = (int)hue; float f = hue - i; simd_float4 bpqt = { brightness, brightness * (1 - saturation), brightness * (1 - saturation * f), brightness * (1 - saturation * (1 - f)) }; switch (i) { case 0: return vfx_make_float4(bpqt.xwy, hsba.a); break; case 1: return vfx_make_float4(bpqt.zxy, hsba.a); break; case 2: return vfx_make_float4(bpqt.yxw, hsba.a); break; case 3: return vfx_make_float4(bpqt.yzx, hsba.a); break; case 4: return vfx_make_float4(bpqt.wyx, hsba.a); break; default:return vfx_make_float4(bpqt.xyz, hsba.a); break; } } static inline simd_float3 VFX_SIMD vfx_hsb_to_rgb(simd_float3 rgb) { return vfx_hsb_to_rgb(vfx_make_float4_undef(rgb)).xyz; } static inline simd_float4x4 vfx_get_local_matrix_for_cube_index(int32_t face) { simd_float4 xAxis = vfx_float4_unit_x(); simd_float4 yAxis = vfx_float4_unit_y(); simd_float4 zAxis = vfx_float4_unit_z(); simd_float4 zero = vfx_float4_unit_w(); switch (face) { case 0: return vfx_float4x4( zAxis, yAxis, -xAxis, zero); // +X case 1: return vfx_float4x4(-zAxis, yAxis, xAxis, zero); // -X case 2: return vfx_float4x4( xAxis, zAxis, -yAxis, zero); // +Y case 3: return vfx_float4x4( xAxis,-zAxis, yAxis, zero); // -Y case 4: return vfx_float4x4(-xAxis, yAxis, -zAxis, zero); // +Z default: /* 5 */ return vfx_float4x4( xAxis, yAxis, zAxis, zero); // -Z } } // legacy, kept for backward compatibility #ifndef __METAL_VERSION__ static inline simd_quatf VFX_SIMD vfx_quat_invert(simd_quatf q) { return vfx_quat_normalize(vfx_quat_conjugate(q)); } #pragma mark Projection Matrices static inline simd_float4x4 VFX_SIMD vfx_make_orthographic_projection(double l, double r, double b, double t, double zn, double zf) { zf = (zf <= zn) ? zn + 1.0 : zf; // prevent reversed near/far return simd_matrix(vfx_make_float4( (float)(2 / (r - l)), 0.f, 0.f, 0.f ), vfx_make_float4( 0.f, (float)(2 / (t - b)), 0.f, 0.f ), vfx_make_float4( 0.f, 0.f, (float)(1 / (zn - zf)), 0.f ), // 8, 9, 10, 11 vfx_make_float4((float)(-(l + r) / (r - l)), (float)(-(t + b) / (t - b)), (float)(zn / (zn - zf)), 1.f ) // 12, 13, 14, 15 );} static inline bool vfx_matrix_projection_is_reverseZ(simd_float4x4 mat) { return mat.columns[2][2] > 0.f; } static inline bool VFX_SIMD vfx_matrix_is_orthographic_projection(simd_float4x4 proj) { return proj.columns[2][3] == 0.f; } // angle in degrees // zfar > znear static inline float vfx_robustCoTanInv(double angleInDegree) { double tanFov = __tanpi( angleInDegree / 360.0 ); return (float)(1 / ((tanFov == 0.0) ? 1.0 : tanFov)); // default to tan(pi/4)=1 if tan == 0 } static inline simd_float4x4 vfx_matrix_make_reversez(void) { return simd_matrix( simd_make_float4( 1.f, 0.f, 0.f, 0.f ), simd_make_float4( 0.f, 1.f, 0.f, 0.f ), simd_make_float4( 0.f, 0.f, -1.f, 0.f ), simd_make_float4( 0.f, 0.f, 1.f, 1.f ) ); } static inline simd_float4x4 VFX_SIMD __perspective(float m00, float m11, float m20, float m21, double zn, double zf) { zf = (zf <= zn) ? zn + 1.0 : zf; // prevent reversed near/far double dm22 = zf / (zn - zf); float m22 = (float)dm22; double dm32 = zn * dm22; float m32 = (float)dm32; return simd_matrix( simd_make_float4( m00, 0.f, 0.f, 0.f ), // 0, 1, 2, 3 simd_make_float4( 0.f, m11, 0.f, 0.f ), // 4, 5, 6, 7 simd_make_float4( m20, m21, m22, -1.f ), // 8, 9, 10, 11 simd_make_float4( 0.f, 0.f, m32, 0.f ) // 12, 13, 14, 15 ); } static inline simd_float4x4 VFX_SIMD vfx_make_perspective_projection(double fovy, double zn, double zf) { float coTan = vfx_robustCoTanInv(fovy); return __perspective(coTan, coTan, 0, 0, zn, zf); } static inline simd_float4x4 VFX_SIMD vfx_make_perspective_projection(double l, double r, double b, double t, double zn, double zf) { double m00 = 2 * zn / (r - l); double m11 = 2 * zn / (t - b); double m20 = (r + l) / (r - l); double m21 = (t + b) / (t - b); return __perspective((float)m00, (float)m11, (float)m20, (float)m21, zn, zf); } // extract near/far parameters from a perspective or an ortho projectin matrix // This function supports ortho/perspective matrices in GL or MTL convention // reverseZ is not supported! static inline simd_float2 VFX_SIMD vfx_matrix_compute_near_far(simd_float4x4 proj) { double c = proj.columns[2][2]; double d = proj.columns[3][2]; simd_float2 nf; if (vfx_matrix_is_orthographic_projection(proj)) { // c = -1 / (f - n) // d = n / (n - f) nf.x = (float)(d / c); nf.y = (float)((d - 1) / c); } else { // c: f / (n - f) // d: n.f / (n - f) nf.x = (float)(d / c); nf.y = (float)(d / (c + 1)); } if (nf.x > nf.y) nf.xy = nf.yx; return nf; } #endif // Legacy TOREMOVE static inline simd_float2 VFX_SIMD vfx_transform_direction(simd_float3x3 m, simd_float2 d) { simd_float2 result = d.x * m.columns[0].xy; result += d.y * m.columns[1].xy; return result; } static inline simd_float2 VFX_SIMD vfx_transform_position(simd_float3x3 m, simd_float2 p) { simd_float2 result = m.columns[2].xy; result += p.x * m.columns[0].xy; result += p.y * m.columns[1].xy; return result; } NS_ASSUME_NONNULL_END #endif // MARK: - vfx_texture.h #ifndef VFX_TEXTURE_H #define VFX_TEXTURE_H NS_ASSUME_NONNULL_BEGIN // This should go in vfx_base.h but can't because of module issues? #ifdef __METAL_VERSION__ #define vfx_binding_index int32_t // Used in scripting and generated code struct vfx_sampler1d { const texture1d texture; const sampler sampler; }; static inline VFX_SIMD_UNPURE simd_float4 sample_1d(const vfx_ref vfx_sampler1d* sampler, float uv) { return sampler->texture.sample(sampler->sampler, uv); } static inline VFX_SIMD_UNPURE float vfx_sample_1d_curve(const vfx_ref vfx_sampler1d* sampler, float uv) { float dim = sampler->texture.get_width(); float u = (uv * (dim - 1.0f) + 0.5f ) / dim; return sample_1d(sampler, u).r; } struct vfx_sampler2d { const texture2d texture; const sampler sampler; }; static inline VFX_SIMD_UNPURE simd_float4 vfx_sample_2d(const vfx_ref vfx_sampler2d* sampler, simd_float2 uv) { return sampler->texture.sample(sampler->sampler, uv); } static inline VFX_SIMD_UNPURE simd_float2 vfx_sample_2d_gradient_x(const vfx_ref vfx_sampler2d* sampler, simd_float2 uv) { const float kStep = 0.01f; const float2 xStep = float2(kStep, 0); const float2 yStep = float2(0, kStep); return float2( sampler->texture.sample(sampler->sampler, uv + xStep).x - sampler->texture.sample(sampler->sampler, uv - xStep).x, sampler->texture.sample(sampler->sampler, uv + yStep).x - sampler->texture.sample(sampler->sampler, uv - yStep).x ); } struct vfx_sampler3d { const texture3d texture; const sampler sampler; }; #else // Swift import needs to be converted to Int for convenience of use #define vfx_binding_index size_t typedef VFX_ENUM(int8_t) { pixel_format_r16f, pixel_format_r32f, pixel_format_rgba16f, pixel_format_rgba32f, pixel_format_r8unorm, pixel_format_r8unorm_srgb, pixel_format_rgba8unorm, pixel_format_rgba8unorm_srgb, pixel_format_bgra8unorm, pixel_format_bgra8unorm_srgb, pixel_format_count } pixel_format; // Also see extension in Metal+Extensions typedef VFX_ENUM(int8_t) { vfx_address_mode_clampToZero, vfx_address_mode_clampToEdge, vfx_address_mode_repeat, vfx_address_mode_repeatMirror, vfx_address_mode_count } vfx_address_mode; // Also see extension in Metal+Extensions typedef VFX_ENUM(int8_t) { vfx_filter_mode_nearest, vfx_filter_mode_linear, vfx_filter_mode_count } vfx_filter_mode; // Sampler3D struct __vfx_sampler3d; typedef simd_float4 (*sample_3d_func)(const struct __vfx_sampler3d* sampler, simd_float3 uvw); typedef struct __vfx_sampler3d { simd_float4 dimensions; simd_int4 edges; simd_int4 cellStrideBytes; simd_int4 outsideRepeatStrideBytes; void* _Nullable data; sample_3d_func _Nullable sample_func; simd_float3 halfPixel; } vfx_sampler3d; VFX_EXPORT_C vfx_sampler3d vfx_sampler3d_make(void* data, simd_int3 dimensions, pixel_format format, vfx_address_mode address, vfx_filter_mode filter); static inline VFX_SIMD_UNPURE simd_float4 sample_3d(const vfx_sampler3d* sampler, simd_float3 uvw) { return sampler->sample_func(sampler, uvw); } static inline VFX_SIMD_UNPURE simd_float3 sample_3d_gradient_x(const vfx_sampler3d* sampler, simd_float3 uvw) { float kStep = 0.01f; simd_float3 xStep = (simd_float3){ kStep, 0, 0 }; simd_float3 yStep = (simd_float3){ 0, kStep, 0 }; simd_float3 zStep = (simd_float3){ 0, 0, kStep }; return (simd_float3){ sample_3d(sampler, uvw + xStep).x - sample_3d(sampler, uvw - xStep).x, sample_3d(sampler, uvw + yStep).x - sample_3d(sampler, uvw - yStep).x, sample_3d(sampler, uvw + zStep).x - sample_3d(sampler, uvw - zStep).x }; } // vfx_sampler2d struct __vfx_sampler2d; typedef simd_float4 (*sample_2d_func)(const struct __vfx_sampler2d* sampler, simd_float2 uv); typedef struct __vfx_sampler2d { simd_float2 dimensions; simd_int2 edges; simd_int2 cellStrideBytes; simd_int2 outsideRepeatStrideBytes; void* data; sample_2d_func _Nullable sample_func; simd_float2 halfPixel; } vfx_sampler2d; VFX_EXPORT_C vfx_sampler2d vfx_sampler2d_make(void* data, simd_int2 dimensions, pixel_format format, vfx_address_mode address, vfx_filter_mode filter); static inline VFX_SIMD_UNPURE simd_float4 vfx_sample_2d(const vfx_sampler2d* sampler, simd_float2 uv) { return sampler->sample_func(sampler, uv); } static inline VFX_SIMD_UNPURE simd_float2 vfx_sample_2d_gradient_x(const vfx_sampler2d* sampler, simd_float2 uv) { float kStep = 0.01f; simd_float2 xStep = (simd_float2){ kStep, 0 }; simd_float2 yStep = (simd_float2){ 0, kStep }; return (simd_float2){ vfx_sample_2d(sampler, uv + xStep).x - vfx_sample_2d(sampler, uv - xStep).x, vfx_sample_2d(sampler, uv + yStep).x - vfx_sample_2d(sampler, uv - yStep).x, }; } // vfx_sampler1d struct __vfx_sampler1d; typedef simd_float4 (*sample_1d_func)(const struct __vfx_sampler1d* sampler, float uv); typedef struct __vfx_sampler1d { float dimensions; int32_t edges; int32_t cellStrideBytes; int32_t outsideRepeatStrideBytes; void* data; sample_1d_func _Nullable sample_func; float halfPixel; } vfx_sampler1d; VFX_EXPORT_C vfx_sampler1d vfx_sampler1d_make(void* data, int32_t dimension, pixel_format format, vfx_address_mode address, vfx_filter_mode filter); static inline VFX_SIMD_UNPURE simd_float4 sample_1d(const vfx_sampler1d* sampler, float uv) { return sampler->sample_func(sampler, uv); } static inline VFX_SIMD_UNPURE float vfx_sample_1d_curve(const vfx_sampler1d* sampler, float uv) { float dim = sampler->dimensions; float u = (uv * (dim - 1.0f) + 0.5f ) / dim; return sample_1d(sampler, u).r; } #endif // __METAL_VERSION__ #pragma mark SDF static inline float VFX_SIMD_UNPURE vfx_ray_sdf_2d(simd_float2 new_pos_ls, simd_float2 old_pos_ls, simd_float2 vel_dt_ls, float threshold, const vfx_ref vfx_sampler2d* sampler, vfx_out simd_float2* normal) { // check that point is inside the bounding box if (vfx_any(vfx_abs(new_pos_ls) > 1.f)) { return -1.f; } // compute the distance to the object simd_float2 new_pos_uv = vfx_remap_n11_to_01(new_pos_ls); float dist = vfx_sample_2d(sampler, new_pos_uv).x; if (dist > threshold) { return -1.f; } // compute old pos simd_float2 old_pos_inside_ls; // check that old_pos_ls is inside the bounding box bool is_old_outside = vfx_any(vfx_abs(old_pos_ls) > 1.f); if (is_old_outside) { // get point on the bounding box edge simd_float2 s = vfx_sign(vel_dt_ls); simd_float2 a = vfx_abs(vel_dt_ls); simd_float2 v_m = vfx_max(a, VFX_EPSILON); simd_float2 m = 1.0 / v_m; simd_float2 n = s * m * old_pos_ls; float t = vfx_reduce_max(-n - m); old_pos_inside_ls = old_pos_ls + t * vel_dt_ls; } else { old_pos_inside_ls = old_pos_ls; } simd_float2 old_pos_inside_uv = vfx_remap_n11_to_01(old_pos_inside_ls); // start sphere tracing simd_float2 dir = vfx_robust_normalize(vel_dt_ls); float dist_max_ls = 0.f; const float SPHERE_TRACING_MAX_ITER = 5; const float SPHERE_TRACING_EPSILON = 0.01f; for (int i = 0 ; i < SPHERE_TRACING_MAX_ITER ; ++i) { // we must convert distance from local space to uv space float d_ls = vfx_sample_2d(sampler, old_pos_inside_uv + 0.5f * dist_max_ls * dir).x - threshold; if (d_ls <= SPHERE_TRACING_EPSILON) { break; } dist_max_ls += d_ls; } // correct distance if (is_old_outside) { dist_max_ls += vfx_length(old_pos_inside_ls - old_pos_ls); } // compute t parameter float t = dist_max_ls / vfx_length(new_pos_ls - old_pos_ls); if (t > 1.f) { return -1.f; } // compute normal simd_float2 pt_uv = old_pos_inside_uv + 0.5f * dist_max_ls * dir; *normal = vfx_robust_normalize(vfx_sample_2d_gradient_x(sampler, pt_uv)); return t; } NS_ASSUME_NONNULL_END #endif // MARK: - vfx_shared.h // This should go in vfx_base.h but can't because of module issues? #ifdef __METAL_VERSION__ #define vfx_binding_index int32_t #define VFX_PARTICLE_INVALID UINT_MAX typedef uint32_t vfx_script_particle_index_t; #else // Swift import needs to be converted to Int for convenience of use #define vfx_binding_index size_t #define VFX_PARTICLE_INVALID UINT64_MAX #endif #define VFX_BINDING_ENUM VFX_ENUM(vfx_binding_index) // we used to have USE_LINKED_FUNCTIONS=0 on watchOS (not supported on old watches) but it raised compilation issues: // see rdar://139012921 ([Napili] Device Finding bubble ring is missing) #ifndef USE_LINKED_FUNCTIONS # define USE_LINKED_FUNCTIONS 1 #endif // Only used by script (and compute if update_life is updated) struct frame_constants { float dt; float time; bool isFirstFrame; uint32_t simulationIndex; }; typedef VFX_BINDING_ENUM { //compute only vfx_buffer_binding_index_compute_spawn_id = 0, vfx_buffer_binding_index_compute_uniforms = 1, vfx_buffer_binding_index_compute_extra_buffer = 6, //for extra kernel specific buffers, indices >= compute_extra_buffer are available //compute + render vfx_buffer_binding_index_particle_header = 4, vfx_buffer_binding_index_particle_data = 5, //render only vfx_buffer_binding_index_global_constants = 0, vfx_buffer_binding_index_view_constants = 1, vfx_buffer_binding_index_material_constants = 2, vfx_buffer_binding_index_sdf_debug_data = 2, vfx_buffer_binding_index_voxel_debug_data = 2, vfx_buffer_binding_index_scattering = 3, vfx_buffer_binding_index_object_constants = 3, vfx_buffer_binding_index_instance_transforms = 4, vfx_buffer_binding_index_instance_colors = 5, vfx_buffer_binding_index_light_matrix = 5, vfx_buffer_binding_index_pass_constants = 6, vfx_buffer_binding_index_instance_clipping = 7, vfx_buffer_binding_index_ribbon_profile = 8, vfx_buffer_binding_index_data_sort_order = 9, vfx_buffer_binding_index_external_scene_constants = 10, vfx_buffer_binding_index_external_scene_buffers = 11, vfx_buffer_binding_index_external_entity_constants = 12, vfx_buffer_binding_index_data_prev_next = 13, vfx_buffer_binding_index_virtual_env_probe_textures = 14, vfx_buffer_binding_index_re_entity_constants = 15, vfx_buffer_binding_index_re_view_constants = 16, vfx_buffer_binding_index_re_global_constants = 17, vfx_buffer_binding_index_re_entity_argument_buffer = 18, vfx_buffer_binding_index_re_scene_argument_buffer = 19, vfx_buffer_binding_index_re_vfx_object_constants = 20, #if TARGET_OS_SIMULATOR vfx_buffer_binding_index_virtual_env_probe_textures_sim = 21, #endif //voxel specific vfx_buffer_binding_index_voxel_cone_tracing = 3, vfx_buffer_binding_index_voxel_lod = 4, vfx_buffer_binding_index_voxel_inv_size = 5, vfx_buffer_binding_index_voxel_data = 22, vfx_buffer_binding_index_voxel_uniforms = 23, vfx_buffer_binding_index_voxel_matrix = 24, } vfx_buffer_binding_index; typedef VFX_BINDING_ENUM { vfx_texture_binding_index_main = 0, vfx_texture_binding_index_linear_zbuffer = 1, vfx_texture_binding_index_gi_tex = 2, vfx_texture_binding_index_opaque_zbuffer = 9, vfx_texture_binding_index_normal_buffer = 10, vfx_texture_binding_index_voxel = 11, vfx_texture_binding_index_noise_3d = 11, // used in surface & volumetric renderer vfx_texture_binding_index_blue_noise_dithering = 13, vfx_texture_binding_index_depth_mitigation_alpha_mask = 14, } vfx_texture_binding_index; typedef VFX_BINDING_ENUM { vfx_sampler_binding_index_main = 0, } vfx_sampler_binding_index; // WARNING: cannot change the existing index because of exported custom shaders typedef VFX_ENUM(int8_t) { particle_data_index_counters = 0, particle_data_index_positions = 4, particle_data_index_colors = 5, particle_data_index_velocities = 6, particle_data_index_sizes = 7, particle_data_index_orientations = 8, particle_data_index_ages = 10, particle_data_index_frames = 11, particle_data_index_angles = 12, particle_data_index_pivots = 13, particle_data_index_targets = 14, particle_data_index_masses = 15, particle_data_index_user_data1s = 16, particle_data_index_user_data2s = 17, particle_data_index_user_data3s = 18, particle_data_index_user_data4s = 19, particle_data_index_lifetimes = 20, particle_data_index_parent_ids = 21, particle_data_index_roughness = 22, particle_data_index_metalness = 23, particle_data_index_emission = 24, particle_data_index_ribbon_lengths = 25, particle_data_index_angular_velocities = 26, particle_data_index_angle_velocities = 27, particle_data_index_linear_factors = 28, particle_data_index_angular_factors = 29, particle_data_index_ids = 30, particle_data_index_index_from_id = 31, particle_data_index_free_ids = 32, particle_data_index_neighbor_grid_heads = 33, particle_data_index_neighbor_grid_nexts = 34, particle_data_index_max_count = 35 } particle_data_index; typedef VFX_ENUM(int32_t) { renderer_constants_gamma_blending = 450, renderer_constants_inside_re = 451, renderer_constants_enable_clipping = 452, renderer_constants_enable_dither_fade = 453, // renderer_constants_render_to_composite_layer_LEGACY = 454, renderer_constants_enable_alpha_fade = 455, renderer_constants_enable_depth_mitigation = 456, renderer_constants_enable_auto_alpha = 457, renderer_constants_use_interpolated_crworld = 458, renderer_constants_enable_layered_rendering = 459, renderer_constants_enable_multi_viewport_rendering = 460, renderer_constants_outputs_normal_roughness = 461, renderer_constants_rendering_mode = 462, renderer_constants_enable_portal_clip_plane = 463, renderer_constants_enable_vertex_amplification = 464, renderer_constants_enable_ssao = 465, renderer_constants_outputs_albedo_metalness = 466, renderer_constants_outputs_radiance_ao = 467, renderer_constants_use_api_v2 = 468 } renderer_constants; // DO NOT CHANGE THESE INDICES SINCE THEY CAN BE EXPORTED IN CUSTOM SHADERS typedef VFX_ENUM(int32_t) { // particle_constants_has_position, // particle_constants_has_linear_velocity, particle_constants_has_velocity = 500, // Not stating at 0: rdar://76640645 (VFX custom material compilation issue from added constant SupportsCubeArray and EnableSamplerArray) particle_constants_has_age = 501, particle_constants_has_lifetime = 502, particle_constants_uniform_lifetime = 503, particle_constants_has_color = 504, particle_constants_has_orientation = 506, particle_constants_has_angular_velocity = 507, // TODO merge with orientation / angular_velocity particle_constants_has_angle = 508, particle_constants_has_angle_velocity = 509, particle_constants_has_pivot = 510, particle_constants_has_target = 511, particle_constants_has_mass = 512, particle_constants_has_texture_frame = 513, particle_constants_has_linear_factor = 514, particle_constants_has_angular_factor = 515, particle_constants_has_ribbon_length = 516, particle_constants_has_user_data1 = 517, particle_constants_has_user_data2 = 518, particle_constants_has_user_data3 = 519, particle_constants_has_user_data4 = 520, particle_constants_has_size1D = 521, particle_constants_has_size2D = 522, particle_constants_has_size3D = 523, particle_constants_use_half_size = 524, particle_constants_is_local = 526, particle_constants_has_particle_id = 527, particle_constants_has_parent_id = 528, // // Update attribute specific particle_constants_size_over_life_mode = 530, // 0: planar / 1: planar / 2: free particle_constants_drag_use_size, particle_constants_attach_use_over_life, particle_constants_noise_use_transform, particle_constants_plane_collider_infinite, particle_constants_plane_collider_sphere, particle_constants_plane_collider_double_sided, particle_constants_enable_gravity, particle_constants_box_collider_inside, particle_constants_force_field_kind, // // Init attribute specific particle_constants_emitter_shape_type = 540, // 0: point, 1: sphere, 2: box, 3: cylinder, 4: plane, 5: torus, 6: cone particle_constants_emitter_shape_distribution, // 0: random, 1: grid, 2: uniform particle_constants_emitter_shape_orientation, // -1 no orientation otherwise same as direction particle_constants_emitter_shape_direction, // 0: world, 1: local, 2: shape, 3: motion, 4: random particle_constants_emitter_shape_has_texture_2D, particle_constants_emitter_pointcache_inherit_position = 550, particle_constants_emitter_pointcache_inherit_color, particle_constants_emitter_pointcache_inherit_velocity, particle_constants_emitter_pointcache_inherit_orientation, particle_constants_emitter_pointcache_inherit_size, particle_constants_emitter_pointcache_has_transform, particle_constants_emitter_pointcache_init_stage, particle_constants_set_color_mode = 560, // TODO Render specific, should be moved particle_constants_render_blending_mode = 600, particle_constants_render_enable_texture_2d = 601, particle_constants_render_enable_texture_3d = 602, particle_constants_render_enable_texture_2d_array = 603, particle_constants_render_enable_texture_cube = 604, particle_constants_render_enable_texture_prelighted = 605, particle_constants_render_texture_is_not_alphapremultiplied = 606, particle_constants_render_texture_channel = 607, particle_constants_render_local_transform_LEGACY = 608, particle_constants_render_has_particle_transform = 609, particle_constants_render_has_pbr_lighting = 610, particle_constants_render_disable_specular = 611, particle_constants_render_animation_mode = 612, particle_constants_render_inter_frame_interpolation = 613, particle_constants_render_enable_scattering = 614, particle_constants_render_orientation_mode = 615, particle_constants_render_shape_mode = 616, // particle_constants_render_enable_velocity_stretch_LEGACY = 617, particle_constants_render_enable_cutoff = 618, particle_constants_render_enable_sorting = 619, particle_constants_render_enable_thick_lines = 620, particle_constants_render_enable_soft_particles, particle_constants_render_need_opaque_zbuffer, particle_constants_render_need_normal_buffer, particle_constants_render_voxelDataChannelCount, particle_constants_render_voxelizeOpacity = 625, particle_constants_render_voxelizeColor, particle_constants_render_enable_per_vertex_color = 627, // particle_constants_render_need_raytracing_LEGACY = 628, // particle_constants_render_need_point_size_LEGACY = 629, // particle_constants_render_need_ribbon_length_LEGACY = 630, // particle_constants_render_need_procedural_uv_LEGACY = 632 particle_constants_has_ribbon_cap_begin_texture = 633, particle_constants_has_ribbon_cap_end_texture = 634, particle_constants_render_use_particle_orientation_for_lighting = 635 } particle_constants; // Matches MTLDataType typedef VFX_ENUM(int16_t) { particle_data_type_float = 3, particle_data_type_float2 = 4, particle_data_type_float3 = 5, particle_data_type_float4 = 6, particle_data_type_int = 29, particle_data_type_int2 = 30, particle_data_type_int3 = 31, particle_data_type_int4 = 32, particle_data_type_uint = 33, particle_data_type_uint2 = 34, particle_data_type_uint3 = 35, particle_data_type_uint4 = 36, } particle_data_type; typedef struct { int32_t offset; int16_t type; // particle_data_type int16_t stride; } particle_data_description; struct particle_data_header { particle_data_description descriptions[particle_data_index_max_count]; }; #ifdef __METAL_VERSION__ // consider these always there... // constant bool has_position [[ function_constant(particle_constants_has_position) ]]; // constant bool has_linear_velocity [[ function_constant(particle_constants_has_linear_velocity) ]]; constant bool has_velocity [[ function_constant(particle_constants_has_velocity) ]]; constant bool has_age [[ function_constant(particle_constants_has_age) ]]; constant bool has_lifetime [[ function_constant(particle_constants_has_lifetime) ]]; constant bool has_color [[ function_constant(particle_constants_has_color) ]]; constant bool has_orientation [[ function_constant(particle_constants_has_orientation) ]]; constant bool has_angular_velocity [[ function_constant(particle_constants_has_angular_velocity) ]]; constant bool has_angle [[ function_constant(particle_constants_has_angle) ]]; constant bool has_angle_velocity [[ function_constant(particle_constants_has_angle_velocity) ]]; constant bool has_texture_frame [[ function_constant(particle_constants_has_texture_frame) ]]; constant bool has_particle_id [[ function_constant(particle_constants_has_particle_id) ]]; constant bool has_parent_id [[ function_constant(particle_constants_has_parent_id) ]]; constant bool has_user_data1 [[ function_constant(particle_constants_has_user_data1) ]]; constant bool has_user_data2 [[ function_constant(particle_constants_has_user_data2) ]]; constant bool has_user_data3 [[ function_constant(particle_constants_has_user_data3) ]]; constant bool has_user_data4 [[ function_constant(particle_constants_has_user_data4) ]]; constant bool has_size1D [[ function_constant(particle_constants_has_size1D) ]]; constant bool has_size2D [[ function_constant(particle_constants_has_size2D) ]]; constant bool has_size3D [[ function_constant(particle_constants_has_size3D) ]]; constant bool use_half_size [[ function_constant(particle_constants_use_half_size) ]]; constant bool has_linear_factor [[ function_constant(particle_constants_has_linear_factor) ]]; constant bool has_angular_factor [[ function_constant(particle_constants_has_angular_factor) ]]; constant bool has_ribbon_length [[ function_constant(particle_constants_has_ribbon_length) ]]; constant bool has_ribbon_cap_begin_texture [[ function_constant(particle_constants_has_ribbon_cap_begin_texture) ]]; constant bool has_ribbon_cap_end_texture [[ function_constant(particle_constants_has_ribbon_cap_end_texture) ]]; constant bool has_ribbon_caps_textures = has_ribbon_cap_begin_texture || has_ribbon_cap_end_texture; constant bool has_pivot [[ function_constant(particle_constants_has_pivot) ]]; constant bool has_target [[ function_constant(particle_constants_has_target) ]]; constant bool has_mass [[ function_constant(particle_constants_has_mass) ]]; constant bool is_local [[ function_constant(particle_constants_is_local) ]]; #endif //__METAL_VERSION__ #define MAX_SPAWNID 16 NS_ASSUME_NONNULL_BEGIN typedef struct { uint32_t threadgroupsPerGrid[3]; } DispatchThreadgroupsIndirectArguments; // ⚠️ any change here might break already compiled script metallibs as this is used by particle_script_argument_buffer struct particle_counters { uint32_t active_count; #ifdef __METAL_VERSION__ atomic_uint dead_count; atomic_uint live_count; #else uint32_t dead_count; uint32_t live_count; #endif uint32_t added_count; uint32_t generated_count; uint32_t allocated_count; uint32_t current_seed; uint32_t visible_count; // 20 simd_float4x4 world_from_emitter; // used for space transformation / isLocal DispatchThreadgroupsIndirectArguments active_dispatch_args; DispatchThreadgroupsIndirectArguments newly_created_dispatch_args; bool has_transient_color; bool has_transient_size; //spawn id bool has_spawn_id; uint32_t spawn_id_offsets[MAX_SPAWNID]; uint32_t spawn_id_counts[MAX_SPAWNID]; uint32_t spawn_id_added_count[MAX_SPAWNID]; uint32_t spawn_id_previous_offsets[MAX_SPAWNID]; #ifdef __METAL_VERSION__ atomic_uint spawn_id_dead_count[MAX_SPAWNID]; atomic_uint spawn_id_live_count[MAX_SPAWNID]; #else uint32_t spawn_id_dead_count[MAX_SPAWNID]; uint32_t spawn_id_live_count[MAX_SPAWNID]; #endif DispatchThreadgroupsIndirectArguments spawn_id_active_dispatch_args[MAX_SPAWNID]; DispatchThreadgroupsIndirectArguments spawn_id_newly_created_dispatch_args[MAX_SPAWNID]; uint32_t free_ids_count; //neighbor grid bool has_neighbor_grid; simd_uint3 grid_dimensions; simd_float3 grid_origin; float grid_cell_size; simd_uint3 grid_cell_stride; uint32_t max_neighbors_per_cell; #ifdef __METAL_VERSION__ uint32_t get_active_count() device { return active_count; } void set_active_count(uint32_t v) device { active_count = v; } uint32_t get_live_count() device { return atomic_load_explicit(&live_count, memory_order_relaxed); } void set_live_count(uint32_t v) device { atomic_store_explicit(&live_count, v, memory_order_relaxed); } uint32_t get_dead_count() device { return atomic_load_explicit(&dead_count, memory_order_relaxed); } void set_dead_count(uint32_t v) device { atomic_store_explicit(&dead_count, v, memory_order_relaxed); } uint32_t get_added_count() device { return added_count; // Do we need atomicity? } void set_added_count(uint32_t v) device { added_count = v; // Do we need atomicity? } uint32_t get_generated_count() device { return generated_count; } void add_generated_count(uint32_t v) device { generated_count += v; } void set_generated_count(uint32_t v) device { generated_count = v; } uint32_t get_allocated_count() device { return allocated_count; } void set_allocated_count(uint32_t v) device { allocated_count = v; } uint32_t get_free_ids_count() device { return free_ids_count; } void set_free_ids_count(uint32_t v) device { free_ids_count = v; } uint32_t get_current_seed() device { return current_seed; } void set_current_seed(uint32_t v) device { current_seed = v; } uint32_t get_visible_count() device { return visible_count; } void set_visible_count(uint32_t v) device { visible_count = v; } uint32_t increment_live_count() device { return atomic_fetch_add_explicit(&live_count, 1, memory_order_relaxed); } uint32_t increment_dead_count() device { return atomic_fetch_add_explicit(&dead_count, 1, memory_order_relaxed); } bool is_outside(uint index) device { return index >= active_count; } bool is_outside(thread uint* index, int spawnid) device { if (spawnid == -1){ return is_outside(*index); } else { if (*index >= spawn_id_counts[spawnid]) return true; *index += spawn_id_offsets[spawnid]; return false; } } uint index_from_added(uint index) device { uint added_start = active_count - added_count; return index + added_start; } bool newly_created_is_outside(thread uint* index) device { if (*index >= added_count) return true; // offset the index to be in the added area uint added_start = active_count - added_count; *index += added_start; return false; } bool newly_created_is_outside(thread uint* index, int spawnid) device { if(spawnid == -1) { return newly_created_is_outside(index); } else { if (*index >= spawn_id_added_count[spawnid]) return true; // offset the index to be in the added area uint added_start = spawn_id_offsets[spawnid] + spawn_id_counts[spawnid] - spawn_id_added_count[spawnid]; *index += added_start; return false; } } //spawn id uint32_t get_live_count_spawn_id(uint spawnid) device { return atomic_load_explicit(&spawn_id_live_count[spawnid], memory_order_relaxed); } void set_live_count_spawn_id(uint32_t v, uint spawnid) device { atomic_store_explicit(&spawn_id_live_count[spawnid], v, memory_order_relaxed); } uint32_t get_dead_count_spawn_id(uint spawnid) device { return atomic_load_explicit(&spawn_id_dead_count[spawnid], memory_order_relaxed); } void set_dead_count_spawn_id(uint32_t v, uint spawnid) device { atomic_store_explicit(&spawn_id_dead_count[spawnid], v, memory_order_relaxed); } uint32_t increment_live_count_spawn_id(uint spawnid) device { return atomic_fetch_add_explicit(&spawn_id_live_count[spawnid], 1, memory_order_relaxed); } uint32_t increment_dead_count_spawn_id(uint spawnid) device { return atomic_fetch_add_explicit(&spawn_id_dead_count[spawnid], 1, memory_order_relaxed); } int32_t get_spawn_id(uint index) device { for (int i = 1; i < MAX_SPAWNID; i++){ if(index < spawn_id_offsets[i]){ return i - 1; } } return MAX_SPAWNID - 1; } //used in scripts int32_t get_spawn_id_if_present(uint index, int32_t dispatch_spawn_id) device { int32_t sid; if (!has_spawn_id) { return -1; } else { sid = dispatch_spawn_id != -1 ? dispatch_spawn_id : get_spawn_id(index); } return sid == MAX_SPAWNID - 1 ? -1 : sid; } //neighbor grid uint3 get_grid_dimensions(){ return grid_dimensions; } uint32_t get_grid_cell_count() device { return grid_dimensions.x * grid_dimensions.y * grid_dimensions.z; } #endif //__METAL_VERSION__ }; #ifdef __METAL_VERSION__ // Default values (also used when the data is missing) #define DEFAULT_POSITION float3(0.f) #define DEFAULT_VELOCITY float3(0.f) #define DEFAULT_AGE 0.f #define DEFAULT_LIFETIME 1.f #define DEFAULT_COLOR float4(1.f) #define DEFAULT_COLOR_H half4(1.h) #define DEFAULT_ORIENTATION vfx_float4_unit_w() #define DEFAULT_ANGULAR_VELOCITY 0.f #define DEFAULT_ANGLE 0.f #define DEFAULT_ANGLE_VELOCITY 0.f #define DEFAULT_TEXTURE_FRAME 0.f #define DEFAULT_SIZE 1.0f #define DEFAULT_LINEAR_FACTOR 1.f #define DEFAULT_ANGULAR_FACTOR 1.f #define DEFAULT_PIVOT 0x80808080 #define DEFAULT_PIVOT_F float3(0.5f) // center #define DEFAULT_TARGET 0.f #define DEFAULT_MASS 1.f #define DEFAULT_ROUGHNESS 0.5f #define DEFAULT_METALNESS 0.f #define DEFAULT_EMISSION 0.f #define DEFAULT_USER_DATA 0.f #define is_defined_and_true(a) (is_function_constant_defined(a) && a) #define has_data(fc, index) (is_function_constant_defined(fc) ? fc : has(index)) #define get_data_u(fc, index, def) has_data(fc, index) ? get_uint32(index)[pid] : def; #define get_data_i(fc, index, def) has_data(fc, index) ? get_int32(index)[pid] : def; #define get_data_f(fc, index, def) has_data(fc, index) ? get_float(index)[pid] : def; #define get_data_f2(fc, index, def) has_data(fc, index) ? get_float2(index)[pid] : def; #define get_data_f3(fc, index, def) has_data(fc, index) ? get_float3(index)[pid] : def; #define get_data_f4(fc, index, def) has_data(fc, index) ? get_float4(index)[pid] : def; #define set_data_u(fc, index, val) if (has_data(fc, index)) { get_uint32(index)[pid] = val; } #define set_data_i(fc, index, val) if (has_data(fc, index)) { get_int32(index)[pid] = val; } #define set_data_f(fc, index, val) if (has_data(fc, index)) { get_float(index)[pid] = val; } #define set_data_f2(fc, index, val) if (has_data(fc, index)) { get_float2(index)[pid] = val; } #define set_data_f3(fc, index, val) if (has_data(fc, index)) { get_float3(index)[pid] = val; } #define set_data_f4(fc, index, val) if (has_data(fc, index)) { get_float4(index)[pid] = val; } struct particle_data { private: constant particle_data_header& particle_header; device const uint8_t* data; public: device particle_counters* counters() const { // OPTIM: get rid of desc if offset is always 0 particle_data_description desc = particle_header.descriptions[ particle_data_index_counters ]; return (device particle_counters *)(data + desc.offset); } particle_data(constant particle_data_header& particle_header, device const uint8_t* data) : particle_header(particle_header), data(data) { } // Counters uint32_t get_active_count() const { return counters()->get_active_count(); } void set_active_count(uint32_t v) { counters()->set_active_count(v); } uint32_t get_live_count() { return counters()->get_live_count(); } void set_live_count(uint32_t v) { counters()->set_live_count(v); } uint32_t get_added_count() { return counters()->get_added_count(); } void set_added_count(uint32_t v) { counters()->set_added_count(v); } uint32_t get_dead_count() { return counters()->get_dead_count(); } void set_dead_count(uint32_t v) { counters()->set_dead_count(v); } uint32_t get_generated_count() { return counters()->get_generated_count(); } void add_generated_count(uint32_t v) { counters()->add_generated_count(v); } uint32_t get_allocated_count() { return counters()->get_allocated_count(); } void set_allocated_count(uint32_t v) { counters()->set_allocated_count(v); } uint32_t get_current_seed() { return counters()->get_current_seed(); } void set_current_seed(uint32_t v) { counters()->set_current_seed(v); } uint32_t get_visible_count() const { return counters()->get_visible_count(); } void set_visible_count(uint32_t v) { counters()->set_visible_count(v); } uint32_t increment_live_count() { return counters()->increment_live_count(); } uint32_t increment_dead_count() { return counters()->increment_dead_count(); } bool is_outside(uint index) const { return counters()->is_outside(index); } bool is_outside(thread uint* index, int spawnid) { return counters()->is_outside(index, spawnid); } uint index_from_added(uint index) { return counters()->index_from_added(index); } bool newly_created_is_outside(thread uint* index) { return counters()->newly_created_is_outside(index); } bool newly_created_is_outside(thread uint* index, int spawnid) { return counters()->newly_created_is_outside(index, spawnid); } int32_t get_spawn_id(uint index){ return counters()->get_spawn_id(index); } int32_t get_spawn_id_if_present(uint index, int32_t dispatch_spawn_id){ return counters()->get_spawn_id_if_present(index, dispatch_spawn_id); } float4x4 world_from_emitter() { return counters()->world_from_emitter; } float3 emitter_scale() { return vfx_get_scale(world_from_emitter()); } float4 emitter_orientation() { return vfx_quat_(world_from_emitter()); } // Random uint32_t init_kernel_seed(uint32_t kernel_offset, uint32_t particle_offset) { return counters()->get_current_seed() + kernel_offset + particle_offset; } uint32_t get_seed(int pid) { return init_kernel_seed(0, pid); } // Generic Data device uint32_t* get_uint32(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device uint32_t *)(data + desc.offset); } device atomic_uint* get_atomic_uint(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device atomic_uint *)(data + desc.offset); } device int32_t* get_int32(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device int32_t *)(data + desc.offset); } device float* get_float(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device float *)(data + desc.offset); } device float2* get_float2(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device float2 *)(data + desc.offset); } device float3* get_float3(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device float3 *)(data + desc.offset); } device float4* get_float4(int data_index) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return (device float4 *)(data + desc.offset); } // Non Function Constant based APIs bool has(int data_index) const { return particle_header.descriptions[ data_index ].offset > 0; } bool has(int data_index, int16_t type) const { particle_data_description desc = particle_header.descriptions[ data_index ]; return desc.offset > 0 && desc.type == type; } // Data float3 get_position(int pid) const { return get_float3(particle_data_index_positions)[pid]; } void set_position(int pid, float3 v) { get_float3(particle_data_index_positions)[pid] = v; } float3 get_velocity(int pid) const { return get_data_f3(has_velocity, particle_data_index_velocities, DEFAULT_VELOCITY); } void set_velocity(int pid, float3 v) { set_data_f3(has_velocity, particle_data_index_velocities, v); } float4 get_color(int pid) const { return get_data_f4(has_color, particle_data_index_colors, DEFAULT_COLOR); } half4 get_color_as_half(int pid) const { half4 color = half4(get_color(pid)); color.a = saturate(color.a); return color; } void set_color(int pid, float4 v) { set_data_f4(has_color, particle_data_index_colors, v); } float2 get_ribbon_length(int pid) const { return get_data_f2(has_ribbon_length, particle_data_index_ribbon_lengths, 0.f); } void set_ribbon_length(int pid, float2 v) { set_data_f2(has_ribbon_length, particle_data_index_ribbon_lengths, v); } float3 get_size(uint pid) const { // TODO: use one FC of all sizes if (is_defined_and_true(has_size3D)) { return get_float3(particle_data_index_sizes)[ pid ]; } else if (is_defined_and_true(has_size2D)) { return float3(get_float2(particle_data_index_sizes)[ pid ], 0.001f); // the reasonning is that planar means small z } else if (is_defined_and_true(has_size1D)) { return float3(get_float(particle_data_index_sizes)[ pid ]); } // all FC are false or undefined -> check header particle_data_description desc = particle_header.descriptions[ particle_data_index_sizes ]; if (desc.offset > 0) { switch (desc.type) { case particle_data_type_float3: return get_float3(particle_data_index_sizes)[ pid ]; case particle_data_type_float2: return float3(get_float2(particle_data_index_sizes)[ pid ], 0.001f); case particle_data_type_float: return float3(get_float(particle_data_index_sizes)[ pid ]); } } return DEFAULT_SIZE; } void set_size(int pid, float3 v) { // TODO: use one FC of all sizes if (is_defined_and_true(has_size3D)) { get_float3(particle_data_index_sizes)[ pid ] = v; } else if (is_defined_and_true(has_size2D)) { get_float2(particle_data_index_sizes)[ pid ] = v.xy; } else if (is_defined_and_true(has_size1D)) { get_float(particle_data_index_sizes)[ pid ] = v.x; } else { // all FC are false or undefined -> check header particle_data_description desc = particle_header.descriptions[ particle_data_index_sizes ]; if (desc.offset > 0) { switch (desc.type) { case particle_data_type_float3: get_float3(particle_data_index_sizes)[ pid ] = v; break; case particle_data_type_float2: get_float2(particle_data_index_sizes)[ pid ] = v.xy; break; case particle_data_type_float: get_float(particle_data_index_sizes)[ pid ] = v.x; break; } } } } float2 get_size2D(uint pid) const { return get_size(pid).xy; } void set_size2D(uint pid, float2 size) { set_size(pid, float3(size, 1.f)); } float get_size1D(uint pid) const { return get_size(pid).x; } void set_size1D(uint pid, float size) { set_size(pid, float3(size)); } float4 get_orientation(int pid) const { return get_data_f4(has_orientation, particle_data_index_orientations, DEFAULT_ORIENTATION); } void set_orientation(int pid, float4 v) { set_data_f4(has_orientation, particle_data_index_orientations, v); } float4 get_angular_velocity(int pid) const { return get_data_f4(has_angular_velocity, particle_data_index_angular_velocities, DEFAULT_ANGULAR_VELOCITY); } void set_angular_velocity(int pid, float4 v) { set_data_f4(has_angular_velocity, particle_data_index_angular_velocities, v); } float get_angle(int pid) const { return get_data_f(has_angle, particle_data_index_angles, DEFAULT_ANGLE); } void set_angle(int pid, float v) { set_data_f(has_angle, particle_data_index_angles, v); } float get_angle_velocity(int pid) const { return get_data_f(has_angle_velocity, particle_data_index_angle_velocities, DEFAULT_ANGLE_VELOCITY); } void set_angle_velocity(int pid, float v) { set_data_f(has_angle_velocity, particle_data_index_angle_velocities, v); } // age = normalized age float get_age(int pid) const { return get_data_f(has_age, particle_data_index_ages, DEFAULT_AGE); } void set_age(int pid, float v) { set_data_f(has_age, particle_data_index_ages, v); } float get_lifetime(int pid) const { return get_data_f(has_lifetime, particle_data_index_lifetimes, DEFAULT_LIFETIME); } void set_lifetime(int pid, float v) { set_data_f(has_lifetime, particle_data_index_lifetimes, v); } float get_texture_frame(int pid) const { return get_data_f(has_texture_frame, particle_data_index_frames, DEFAULT_TEXTURE_FRAME); } void set_texture_frame(int pid, float v) { set_data_f(has_texture_frame, particle_data_index_frames, v); } float3 get_linear_factor(int pid) const { return get_data_f3(has_linear_factor, particle_data_index_linear_factors, DEFAULT_LINEAR_FACTOR); } void set_linear_factor(int pid, float3 v) { set_data_f3(has_linear_factor, particle_data_index_linear_factors, v); } float3 get_angular_factor(int pid) const { return get_data_f3(has_angular_factor, particle_data_index_angular_factors, DEFAULT_ANGULAR_FACTOR); } void set_angular_factor(int pid, float3 v) { set_data_f3(has_angular_factor, particle_data_index_angular_factors, v); } // pivot values between 0 and 1 float3 get_pivot(int pid) const { return has_data(has_pivot, particle_data_index_pivots) ? unpack_unorm4x8_to_float(get_uint32(particle_data_index_pivots)[pid]).xyz : DEFAULT_PIVOT_F; // avoid using uint32 default value (DEFAULT_PIVOT) for precision issues. (0.5039 ~= 0.5) } // pivot values between 0 and 1 void set_pivot(int pid, float3 v) { uint32_t u = pack_float_to_unorm4x8(float4(v, 0.f)); set_data_u(has_pivot, particle_data_index_pivots, u); } // pivot values between -1 and 1 float3 get_signed_pivot(int pid) const { return get_pivot(pid) * 2 - 1; } float3 get_target(int pid) const { return get_data_f3(has_target, particle_data_index_targets, DEFAULT_TARGET); } void set_target(int pid, float3 v) { set_data_f3(has_target, particle_data_index_targets, v); } // mass = 1 / mass float get_mass(int pid) const { return get_data_f(has_mass, particle_data_index_masses, DEFAULT_MASS); } void set_mass(int pid, float v) { set_data_f(has_mass, particle_data_index_masses, v) } uint32_t get_id(int pid) const { return get_data_u(has_particle_id, particle_data_index_ids, 0); } void set_id(int pid, uint32_t v) { set_data_u(has_particle_id, particle_data_index_ids, v); } uint32_t get_parent_id(int pid) const { return get_data_u(has_parent_id, particle_data_index_parent_ids, 0); } void set_parent_id(int pid, uint32_t v) { set_data_u(has_parent_id, particle_data_index_parent_ids, v); } float get_roughness(int pid) const { return has(particle_data_index_roughness) ? get_float(particle_data_index_roughness)[pid] : DEFAULT_ROUGHNESS; } void set_roughness(int pid, float v) { if (has(particle_data_index_roughness)) { get_float(particle_data_index_roughness)[pid] = v; } } float get_metalness(int pid) const { return has(particle_data_index_metalness) ? get_float(particle_data_index_metalness)[pid] : DEFAULT_METALNESS; } void set_metalness(int pid, float v) { if (has(particle_data_index_metalness)) { get_float(particle_data_index_metalness)[pid] = v; } } float get_emission(int pid) const { return has(particle_data_index_emission) ? get_float(particle_data_index_emission)[pid] : DEFAULT_EMISSION; } void set_emission(int pid, float v) { if (has(particle_data_index_emission)) { get_float(particle_data_index_emission)[pid] = v; } } float4 get_user_data1(int pid) const { return get_data_f4(has_user_data1, particle_data_index_user_data1s, DEFAULT_USER_DATA); } void set_user_data1(int pid, float4 v) { set_data_f4(has_user_data1, particle_data_index_user_data1s, v); } float4 get_user_data2(int pid) const { return get_data_f4(has_user_data2, particle_data_index_user_data2s, DEFAULT_USER_DATA); } void set_user_data2(int pid, float4 v) { set_data_f4(has_user_data2, particle_data_index_user_data2s, v); } float4 get_user_data3(int pid) const { return get_data_f4(has_user_data3, particle_data_index_user_data3s, DEFAULT_USER_DATA); } void set_user_data3(int pid, float4 v) { set_data_f4(has_user_data3, particle_data_index_user_data3s, v); } float4 get_user_data4(int pid) const { return get_data_f4(has_user_data4, particle_data_index_user_data4s, DEFAULT_USER_DATA); } void set_user_data4(int pid, float4 v) { set_data_f4(has_user_data4, particle_data_index_user_data4s, v); } uint32_t get_index_from_id(int pid) const { return get_data_u(has_particle_id, particle_data_index_index_from_id, VFX_PARTICLE_INVALID); } //used for graph scripts uint32_t safe_get_index_from_id(int32_t pid) const { if (pid < 0 || pid >= int(counters()->get_allocated_count())){ return VFX_PARTICLE_INVALID; } return get_data_u(has_particle_id, particle_data_index_index_from_id, VFX_PARTICLE_INVALID); } void set_index_from_id(int pid, uint32_t index) const { set_data_u(has_particle_id, particle_data_index_index_from_id, index); } uint32_t get_free_id(int pid) const { return get_data_u(has_particle_id, particle_data_index_free_ids, VFX_PARTICLE_INVALID); } void set_free_id(int pid, uint32_t v) const { set_data_u(has_particle_id, particle_data_index_free_ids, v); } //neighbor grid uint32_t get_neighbor_grid_list_head(int cell) const { return (counters()->has_neighbor_grid && cell < int(counters()->get_grid_cell_count())) ? get_uint32(particle_data_index_neighbor_grid_heads)[cell] : VFX_PARTICLE_INVALID; } void set_neighbor_grid_list_head(int cell, uint32_t v) const { //not atomic, used for initialization if (counters()->has_neighbor_grid) { get_uint32(particle_data_index_neighbor_grid_heads)[cell] = v; } } uint32_t atomic_exchange_neighbor_grid_list_head(int cell, uint32_t v) const { return atomic_exchange_explicit(&get_atomic_uint(particle_data_index_neighbor_grid_heads)[cell], v, memory_order_relaxed); } uint32_t get_neighbor_grid_list_next(int pid) const { return counters()->has_neighbor_grid ? get_uint32(particle_data_index_neighbor_grid_nexts)[pid] : VFX_PARTICLE_INVALID; } void set_neighbor_grid_list_next(int pid, uint32_t v) const { if (counters()->has_neighbor_grid) { get_uint32(particle_data_index_neighbor_grid_nexts)[pid] = v; } } uint3 pos_to_cell_3d(simd_float3 pos) const { int3 grid_dim = int3(counters()->grid_dimensions); return uint3((int3(floor((pos - counters()->grid_origin) / counters()->grid_cell_size)) % grid_dim + grid_dim) % grid_dim); } uint32_t cell_3d_to_cell_index(uint3 cell_3d) const { uint3 cell = cell_3d * counters()->grid_cell_stride; return cell.x + cell.y + cell.z; } uint32_t pos_to_cell_index(simd_float3 pos) const { return cell_3d_to_cell_index(pos_to_cell_3d(pos)); } bool particle_index_is_valid(uint32_t index) const { return index < get_active_count(); } void get_27_neighboring_cells_lists(simd_float3 pos, thread uint32_t* cell_lists) const{ uint3 cell_3d = pos_to_cell_3d(pos); int3 grid_dim = int3(counters()->grid_dimensions); //try to order the offsets to avoid biaising too much in one direction constexpr int3 cell_offsets[13] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, {-1, 1, 0 }, {-1, 0, 1 }, { 1, 1, 0 }, { 1, 0, 1 }, { 0,-1, 1 }, { 0, 1, 1 }, {-1,-1, 1 }, { 1,-1, 1 }, {-1, 1, 1 }, { 1, 1, 1 } }; //get all 27 cells around the position //start with the middle cell cell_lists[0] = get_neighbor_grid_list_head(cell_3d_to_cell_index(cell_3d)); int index = 1; for(int i = 0; i < 13; i++){ uint3 current_cell = uint3(((int3(cell_3d) + cell_offsets[i]) % grid_dim + grid_dim) % grid_dim); cell_lists[index] = get_neighbor_grid_list_head(cell_3d_to_cell_index(current_cell)); index++; current_cell = uint3(((int3(cell_3d) - cell_offsets[i]) % grid_dim + grid_dim) % grid_dim); cell_lists[index] = get_neighbor_grid_list_head(cell_3d_to_cell_index(current_cell)); index++; } } uint32_t get_neighbors_in_radius(simd_float3 pos, float radius, uint32_t max_neighbor_count, thread uint32_t* neighbors) const { //init neighbors with invalid particle index for(uint i = 0; i < max_neighbor_count; i++){ neighbors[i] = VFX_PARTICLE_INVALID; } if(!counters()->has_neighbor_grid) return 0; // check needed for graph and scripts uint32_t cell_lists[27]; get_27_neighboring_cells_lists(pos, cell_lists); //collect neighbors particle indices uint neighbor_count = 0; float squared_radius = vfx_pow2(min(radius, counters()->grid_cell_size)); for(int cell = 0; cell < 27; cell++){ uint32_t particle_index = cell_lists[cell]; for(uint i = 0; i < counters()->max_neighbors_per_cell; i++){ if(particle_index != VFX_PARTICLE_INVALID){ float3 their_pos = get_position(particle_index); float3 diff = their_pos - pos; float squared_dist = dot(diff, diff); if(squared_dist < squared_radius){ neighbors[neighbor_count] = particle_index; neighbor_count++; if(neighbor_count >= max_neighbor_count){ break; } } } else { break; } particle_index = get_neighbor_grid_list_next(particle_index); } } //return how many neighbors were found return neighbor_count; } uint32_t get_nearest_neighbor_in_radius(simd_float3 pos, float radius) const { if(!counters()->has_neighbor_grid) return VFX_PARTICLE_INVALID; // check needed for graph and scripts uint32_t cell_lists[27]; get_27_neighboring_cells_lists(pos, cell_lists); uint32_t closest = VFX_PARTICLE_INVALID; float min_squared_dist = MAXFLOAT; float squared_radius = vfx_pow2(min(radius, counters()->grid_cell_size)); for(int cell = 0; cell < 27; cell++){ uint32_t particle_index = cell_lists[cell]; for(uint i = 0; i < counters()->max_neighbors_per_cell; i++){ if(particle_index != VFX_PARTICLE_INVALID){ float3 their_pos = get_position(particle_index); float3 diff = their_pos - pos; float squared_dist = dot(diff, diff); if(squared_dist < min_squared_dist && squared_dist < squared_radius){ closest = particle_index; min_squared_dist = squared_dist; } } else { break; } particle_index = get_neighbor_grid_list_next(particle_index); } } return closest; } // Computed Data Helpers float4x4 get_transform(int pid) const { float3 pos = get_position(pid); float4 ori = get_orientation(pid); float3 scl = get_size(pid); float4x4 emitter_from_particle = vfx_make_transform(ori, float4(pos, 1), scl); if (has_pivot) { float3 pvt = get_signed_pivot(pid); emitter_from_particle = emitter_from_particle * vfx_make_translation(float4(-pvt, 1)); } return emitter_from_particle; } float4x4 get_world_transform(int pid) { return counters()->world_from_emitter * get_transform(pid); } half3 get_rme(int pid) const { return half3(get_roughness(pid), get_metalness(pid), get_emission(pid)); } }; template struct particle_data_attachment { constant particle_data_header& particle_header [[ buffer(B) ]]; device const uint8_t* data [[ buffer(B+1) ]]; particle_data unwrap() { return particle_data(particle_header, data); } }; #endif typedef struct { int resolution; float edgeAtt; simd_float4 worldPosSize; simd_float4 scaleBiasNrm; // [0..1] simd_float4 scaleBiasTex; // [0..resolution] float worldCellSize; float invWorldCellSize; simd_float2 opacityScaleBias; simd_float2 colorScaleBias; int frameCount; } VoxelDataUniforms; NS_ASSUME_NONNULL_END // MARK: - vfx_re_shaders.h #ifndef __VFX_RE_SHADERS_H__ #define __VFX_RE_SHADERS_H__ #if IMPORT_IN_CFX_COMMON_PROFILE using simd_float3x3 = metal::float3x3; using simd_float3 = metal::float3; #endif #if VFX_USES_RE_SHADERS # define VFX_RE_SHADERS_AVAILABLE 1 #else # define VFX_RE_SHADERS_AVAILABLE 0 #endif // VFX_USES_RE_SHADERS #ifndef IMPORT_IN_CFX_COMMON_PROFILE # define VFX_IMPORT_RE_SHADERS_ENGINE_CONSTANTS ( VFX_RE_SHADERS_AVAILABLE && __METAL_VERSION__ ) # define VFX_IMPORT_RE_SHADERS_SHARED_LIGHTING ( VFX_RE_SHADERS_AVAILABLE && __cplusplus ) # define VFX_IMPORT_RE_SHADERS_CLIPPING ( VFX_RE_SHADERS_AVAILABLE && __METAL_VERSION__ ) # define VFX_IMPORT_RE_SHADERS_SHARED_PROBES ( VFX_RE_SHADERS_AVAILABLE && __METAL_VERSION__ ) # define VFX_IMPORT_RE_SHADERS_SHARED_VIRTUAL_ENV_PROBES ( VFX_RE_SHADERS_AVAILABLE && __METAL_VERSION__ ) # define VFX_IMPORT_RE_SHADERS_SHARED_BREAKTHROUGH ( VFX_RE_SHADERS_AVAILABLE && __cplusplus ) #endif // !IMPORT_IN_CFX_COMMON_PROFILE #ifdef __METAL_VERSION__ # define vfx_half half #else # define vfx_half uint16_t #endif typedef struct { uint16_t offset; uint16_t count; } VFX_RE_C_ClippingIndexSlice; typedef struct VFX_RE_C_ClippingConstants_s VFX_RE_C_ClippingConstants; typedef struct VFX_RE_C_EntityConstants_s VFX_RE_C_EntityConstants; typedef struct VFX_RE_C_ViewConstants_s VFX_RE_C_ViewConstants; typedef struct VFX_RE_C_GlobalConstants_s VFX_RE_C_GlobalConstants; #if VFX_IMPORT_RE_SHADERS_ENGINE_CONSTANTS namespace re { struct ClippingConstants; struct ClippingIndexSlice; struct EntityConstants; struct ViewConstants; struct GlobalConstants; } typedef re::ClippingConstants ClippingConstants; typedef re::ClippingIndexSlice ClippingIndexSlice; typedef re::EntityConstants EntityConstants; typedef re::ViewConstants ViewConstants; typedef re::GlobalConstants GlobalConstants; #else typedef VFX_RE_C_ClippingConstants ClippingConstants; typedef VFX_RE_C_ClippingIndexSlice ClippingIndexSlice; typedef VFX_RE_C_EntityConstants EntityConstants; typedef VFX_RE_C_ViewConstants ViewConstants; typedef VFX_RE_C_GlobalConstants GlobalConstants; #endif // VFX_IMPORT_RE_SHADERS_ENGINE_CONSTANTS typedef struct VFX_RE_C_LightConstantBuffer_s VFX_RE_C_LightConstantBuffer; typedef struct VFX_RE_C_IBLConstants_s { simd_float3x3 rotation; float intensityScale; float mixFactor; simd_float3 mixColor; simd_float3x3 whitePointCorrection; float wpcStrength; //rotation/blend constants - these could be conditional on function constants simd_float3x3 rotationBlend; float blendFactor; float postBlendSumFactor; } VFX_RE_C_IBLConstants; #if VFX_IMPORT_RE_SHADERS_SHARED_LIGHTING namespace re { struct LightConstantBuffer; struct IBLConstants; } typedef re::LightConstantBuffer LightConstantBuffer; typedef re::IBLConstants IBLConstants; #else typedef VFX_RE_C_LightConstantBuffer LightConstantBuffer; typedef VFX_RE_C_IBLConstants IBLConstants; #endif // VFX_IMPORT_RE_SHADERS_SHARED_LIGHTING typedef struct VFX_RE_C_ProbeConstantBuffer_s VFX_RE_C_ProbeConstantBuffer; #if VFX_IMPORT_RE_SHADERS_SHARED_PROBES namespace re { struct ProbeConstantBuffer; } typedef re::ProbeConstantBuffer ProbeConstantBuffer; #else typedef VFX_RE_C_ProbeConstantBuffer ProbeConstantBuffer; #endif // VFX_IMPORT_RE_SHADERS_SHARED_PROBES # ifdef __cplusplus namespace VirtualEnvironmentProbeLighting { typedef struct VFX_RE_C_TextureArgumentBuffer_s VFX_RE_C_TextureArgumentBuffer; typedef struct VFX_RE_C_ProbeConstantBuffer_s VFX_RE_C_ProbeConstantBuffer; } # endif #if VFX_IMPORT_RE_SHADERS_SHARED_VIRTUAL_ENV_PROBES namespace re { namespace VirtualEnvironmentProbeLighting { struct TextureArgumentBuffer; struct ProbeConstantBuffer; }} namespace VirtualEnvironmentProbeLighting { typedef re::VirtualEnvironmentProbeLighting::TextureArgumentBuffer TextureArgumentBuffer; typedef re::VirtualEnvironmentProbeLighting::ProbeConstantBuffer ProbeConstantBuffer; } #else # ifdef __cplusplus namespace VirtualEnvironmentProbeLighting { typedef VFX_RE_C_TextureArgumentBuffer TextureArgumentBuffer; typedef VFX_RE_C_ProbeConstantBuffer ProbeConstantBuffer; } # endif #endif // VFX_IMPORT_RE_SHADERS_SHARED_VIRTUAL_ENV_PROBES #if VFX_IMPORT_RE_SHADERS_SHARED_BREAKTHROUGH namespace re { struct BreakthroughGPUData; } typedef re::BreakthroughGPUData BreakthroughGPUData; #elif defined(__cplusplus) struct BreakthroughGPUData; #endif // VFX_IMPORT_RE_SHADERS_SHARED_BREAKTHROUGH #ifdef __cplusplus struct re_entity_argument_buffer; struct re_scene_argument_buffer; struct re_vfx_object_constants; #endif #endif // !__VFX_RE_SHADERS_H__ // MARK: - vfx_render.h typedef VFX_ENUM(int32_t) { argument_buffer_index_zero = 0, // buffers argument_buffer_index_ibl = 10, // we need to skip enough indices to allow inlined properties (dt, time for now) argument_buffer_index_lights = 11, argument_buffer_index_vrr_map = 12, // light related textures argument_buffer_index_irradiance = 20, argument_buffer_index_radiance = 21, argument_buffer_index_brdfLUT = 22, argument_buffer_index_shadowMap = 23, argument_buffer_index_ssao = 24, // deferred related textures argument_buffer_index_linear_depth = 30, argument_buffer_index_opaque_zbuffer= 31, argument_buffer_index_normal_buffer = 32, argument_buffer_index_voxel_data = 33, argument_buffer_index_gi_tex = 34, // PBR argument_buffer_index_albedo_map = 100, argument_buffer_index_roughness_map = 101, argument_buffer_index_metalness_map = 102, argument_buffer_index_occlusion_map = 103, argument_buffer_index_normal_map = 104, argument_buffer_index_emissive_map = 105, // particle base argument_buffer_index_sampler = 200, argument_buffer_index_texture = 201, // quad specific argument_buffer_index_prebaked_lighting_posXYZ = 230, argument_buffer_index_prebaked_lighting_negXYZ = 231, // ribbons specific argument_buffer_index_ribbon_cap_begin_texture = 240, argument_buffer_index_ribbon_cap_end_texture = 241, // externals argument_buffer_index_env_probe_constants = 500, argument_buffer_index_env_probe_cube_array = 501, argument_buffer_index_env_probe_diffuse_array = 502, argument_buffer_index_env_probe_texture_array = 503, argument_buffer_index_spec_max_ess_map = 504, argument_buffer_index_spec_max_ess_avg_table = 505, argument_buffer_index_clipping_constants = 506, argument_buffer_index_blue_noise_texture = 507, argument_buffer_index_breakthrough_texture_array_deprecated = 508, argument_buffer_index_env_probe_cube = 509, argument_buffer_index_env_probe_diffuse = 510, argument_buffer_index_blend_irradiance = 511, argument_buffer_index_blend_radiance = 512, argument_buffer_index_isp_tonemap_lut = 513, argument_buffer_index_texture_filter = 514, argument_buffer_index_virtual_env_probe_constants = 525, argument_buffer_index_virtual_env_probe_textures = 526, argument_buffer_index_portal_opacity = 530, argument_buffer_index_vignetting_fadeout_distance_normalization = 531, argument_buffer_index_vignetting_total_fadeout_distance = 532, argument_buffer_index_vignetting_pivot_crws_position = 533, argument_buffer_index_crws_reference_position = 534, argument_buffer_index_render_options = 550, argument_buffer_index_breakthrough_read_index = 551, // argument_buffer_index_padding = 552, argument_buffer_index_fade_opacity = 553, argument_buffer_index_clipping_index_slice = 554, argument_buffer_index_portal_plane = 556, argument_buffer_index_env_lighting_weight = 557, argument_buffer_index_breakthrough_ui_breakthrough_enabled = 558, argument_buffer_index_breakthrough_ui_breakthrough_influence = 559, argument_buffer_index_breakthrough_texture_array = 570, argument_buffer_index_breakthrough_gpu_data = 571 } argument_buffer_index; struct global_constants { float dt; float time; #ifdef __METAL_VERSION__ constant IBLConstants& ibl [[ id(argument_buffer_index_ibl) ]]; constant LightConstantBuffer& lights [[ id(argument_buffer_index_lights) ]]; constant rasterization_rate_map_data& vrr_map [[ id(argument_buffer_index_vrr_map)]]; #if 1 // !USE_METAL_3 texturecube irradiance [[ id(argument_buffer_index_irradiance) ]]; texturecube radiance [[ id(argument_buffer_index_radiance) ]]; texture2d brdfLUT [[ id(argument_buffer_index_brdfLUT) ]]; depth2d shadowMap [[ id(argument_buffer_index_shadowMap) ]]; texture2d ssaoTex [[ id(argument_buffer_index_ssao) ]]; constant VoxelDataUniforms& voxelUni [[ id(argument_buffer_index_voxel_data) ]]; texture3d scattTex [[ id(argument_buffer_index_gi_tex) ]]; #else uint64_t irradiance; uint64_t radiance; uint64_t brdfLUT; uint64_t shadowMap; uint64_t ssaoTex; uint64_t voxelUni; uint64_t scattTex; #endif #endif }; #define VFX_CORE_UNIFORMS_CAMERA_COUNT 2 struct view_constants { matrix_float4x4 view_from_crworld[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // crWorldToViewArray in RE, viewTransform in SCN matrix_float4x4 proj_from_crworld[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // crWorldToProjArray in RE, viewProjectionTransform in SCN matrix_float4x4 proj_from_view[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // viewToProjArray in RE, projectionTransform in SCN matrix_float4x4 view_from_proj[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // projToViewArray in RE, inverseProjectionTransform in SCN // Position of the camera-view in camera-relative space, usually (0, 0, 0). simd_float3 crws_camera_pos[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // crwsCameraPositionArray in RE // should be == transpose(float3x3(view_from_crworld)) // matrix_float4x4 inverseViewMatrix; // world_from_view matrix_float4x4 inverseViewProjectionMatrix; // crWorld_from_world -> translation -camPos // view = camera_from_world // viewProj = clip_from_camera * camera_from_world // crProj = clip_from_cameraCenterZero * cameraCenterZero_from_world // cameraCenterZero_from_world = inverse(world_from_cameraCenterZero) = inverse(camera.transformNoTranslation) // crInverseViewProjectionMatrix = projection * inverse(camera.transformNoTranslation) matrix_float4x4 crInverseViewProjectionMatrix; // Used by VFX1 skybox to avoid flickering when the camera is far from the center of the world // This is the result of crworld_from_proj * proj_from_viewport // where proj_from_viewport = { // 2/width, 0, 0, -1 // 0,-2/height, 0, 1 // 0, 0, 1, 0 // 0, 0, 0, 1 // } matrix_float4x4 crws_from_viewport[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // This is the reference position of camera-relative world space in world-space. // It can be the position of the main camera, or other appropriate position based on current scene. // crws -> ws : x + worldReferencePosition // ws -> crws : x - worldReferencePosition simd_float3 worldReferencePosition; // crwsReferencePosition in RE // .xy: size .zw: 1 / size simd_float4 renderTargetSize; // used in multiViewport rendering or layered rendering. Most frequently 1 (regular) or 2 (stereo). uint32_t cameraCount; // used in multiViewport rendering. With amplification on you don't have to output the // viewportID or the renderTarget layer in the vertex shader (thanks to setVertexAmplificationCount:viewMappings) uint32_t useVertexAmplification; // contains xy: -drawableSize*2 zw: 1 simd_float4 ndcFromScreen; // madd -> pt * xy + zw simd_float2 nearFar; // Optim From SebAaltonen: // lin(z) = 1 / (lF.x + z * lF.y) // rcp + madd // with lF = float2(n / f, (f - n)/(n * f)) simd_float2 linearizationFactors; uint32_t padding; //was once REVFXClippingIndexSlice clippingIndexSlice; float fadeOpacity; simd_int2 renderTargetDepthStencilFormatSampleCount; simd_float4 vrrMapPhysicalSizeArray[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // percentage of the full viewport to consider for the current camera_index // xy: position zw: size simd_float4 viewport_percents[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // .xy: size .zw: 1 / size simd_float4 vrrMapScreenSize; simd_float4 portal_clip_plane_deprecated; matrix_float4x4 lastframe_proj_from_crworld[VFX_CORE_UNIFORMS_CAMERA_COUNT]; // crWorldToProjArray in RE, viewProjectionTransform in SCN but from last frame // store fog in view_constants because parameters are viewScale dependent simd_float3 fogParameters; // x:-1/(end-start) y:1-start*x z:exp #ifdef __METAL_VERSION__ simd_half4 fogColor; #else simd_short4 fogColor; #endif }; // Should this be merged with // device const float4x4* world_from_locals [[ buffer(4) ]], // for instances? struct object_constants { matrix_float4x4 world_from_local; // incorrect if the scale is non uniform // Do we want to keep it? float world_uniform_scale; }; #ifdef __METAL_VERSION__ #define VFX_ABID(a) [[ id(a) ]] #else #define VFX_ABID(a) #endif struct external_scene_constants { float portal_opacity VFX_ABID(argument_buffer_index_portal_opacity); vfx_half vignetting_fadeout_distance_normalization VFX_ABID(argument_buffer_index_vignetting_fadeout_distance_normalization); vfx_half vignetting_total_fadeout_distance VFX_ABID(argument_buffer_index_vignetting_total_fadeout_distance); simd_float3 vignetting_pivot_crws_position VFX_ABID(argument_buffer_index_vignetting_pivot_crws_position); simd_float3 crws_reference_position VFX_ABID(argument_buffer_index_crws_reference_position); }; struct external_entity_constants { uint16_t render_options VFX_ABID(argument_buffer_index_render_options); uint8_t breakthrough_read_index VFX_ABID(argument_buffer_index_breakthrough_read_index); uint8_t padding; float fade_opacity VFX_ABID(argument_buffer_index_fade_opacity); VFX_RE_C_ClippingIndexSlice clipping_index_slice VFX_ABID(argument_buffer_index_clipping_index_slice); simd_float4 portal_plane VFX_ABID(argument_buffer_index_portal_plane); float environment_lighting_weight VFX_ABID(argument_buffer_index_env_lighting_weight); bool bt_ui_breakthrough_enabled VFX_ABID(argument_buffer_index_breakthrough_ui_breakthrough_enabled); vfx_half bt_ui_breakthrough_influence VFX_ABID(argument_buffer_index_breakthrough_ui_breakthrough_influence); #ifdef __METAL_VERSION__ texture2d_array breakthroughTextureArray [[ id(argument_buffer_index_breakthrough_texture_array) ]]; constant BreakthroughGPUData* breakthroughData [[ id(argument_buffer_index_breakthrough_gpu_data) ]]; #endif }; #ifdef __METAL_VERSION__ constant int kMaxProbeSources = 2; #endif struct external_scene_buffers { #ifdef __METAL_VERSION__ constant ProbeConstantBuffer& probes [[ id(argument_buffer_index_env_probe_constants) ]]; texturecube_array envProbeCubeArray [[ id(argument_buffer_index_env_probe_cube_array) ]]; texturecube_array envProbeDiffuseArray [[ id(argument_buffer_index_env_probe_diffuse_array) ]]; texture2d_array envProbeTextureArray [[ id(argument_buffer_index_env_probe_texture_array) ]]; texture2d textureSpecMaxEss [[ id(argument_buffer_index_spec_max_ess_map) ]]; constant half* specMaxEssAvgTable [[ id(argument_buffer_index_spec_max_ess_avg_table) ]]; constant ClippingConstants& clippingConstants [[ id(argument_buffer_index_clipping_constants) ]]; texture2d blueNoiseTexture [[ id(argument_buffer_index_blue_noise_texture) ]]; texture2d_array breakthroughTextureArray_deprecated [[ id(argument_buffer_index_breakthrough_texture_array_deprecated) ]]; // To be cleaned up metal::texturecube envProbeCube [[ id(argument_buffer_index_env_probe_cube) ]]; metal::texturecube envProbeDiffuse [[ id(argument_buffer_index_env_probe_diffuse) ]]; texturecube blendIrradiance [[ id(argument_buffer_index_blend_irradiance) ]]; texturecube blendRadiance [[ id(argument_buffer_index_blend_radiance) ]]; texture1d ispTonemapLUT [[ id(argument_buffer_index_isp_tonemap_lut) ]]; texture2d textureFilter [[ id(argument_buffer_index_texture_filter) ]]; constant VirtualEnvironmentProbeLighting::ProbeConstantBuffer& virtualEnvProbeConstants [[ id(argument_buffer_index_virtual_env_probe_constants) ]]; #if !TARGET_OS_SIMULATOR constant VirtualEnvironmentProbeLighting::TextureArgumentBuffer& virtualEnvProbeTextures [[ id(argument_buffer_index_virtual_env_probe_textures) ]]; #endif // TARGET_OS_SIMULATOR #endif // __METAL_VERSION__ }; // Used to keep semantic with corresponding value between swift and shader // size_t for swift Int compat typedef VFX_ENUM(size_t) { blend_mode_constants_opaque = 0, blend_mode_constants_alpha = 1, blend_mode_constants_additive = 2, blend_mode_constants_mask = 3 // deprecated } blend_mode_constants; // Mesh/Vertex // Keep these function constants index separated from particle_constants since // they can be used in "generic" rendering context typedef VFX_ENUM(int32_t) { mesh_constants_has_vertex_position = 400, mesh_constants_has_vertex_normal = 401, mesh_constants_has_vertex_uv0 = 402, mesh_constants_has_vertex_uv1 = 403, mesh_constants_has_vertex_uv2 = 404, mesh_constants_has_vertex_uv3 = 405, mesh_constants_has_vertex_color = 406, mesh_constants_has_vertex_tangent = 407 } mesh_constants; typedef VFX_BINDING_ENUM { vfx_vertex_attribute_position = 0, vfx_vertex_attribute_normal = 1, vfx_vertex_attribute_uv0 = 2, vfx_vertex_attribute_uv1 = 3, vfx_vertex_attribute_color = 4, vfx_vertex_attribute_tangent = 5, vfx_vertex_attribute_uv2 = 6, vfx_vertex_attribute_uv3 = 7, vfx_vertex_attribute_weights = 8, vfx_vertex_attribute_joints = 9 } vfx_vertex_attribute; // This need to be in sync with vfx_metal VFXGBufferAttachment typedef VFX_BINDING_ENUM { deferred_attachments_emission = 0, deferred_attachments_normal = 1, deferred_attachments_roughness_metalness_ao = 2, deferred_attachments_albedo = 3, deferred_attachments_velocity = 4 } deferred_attachments; #ifdef __METAL_VERSION__ constant bool has_vertex_position [[ function_constant(mesh_constants_has_vertex_position) ]]; constant bool has_vertex_normal [[ function_constant(mesh_constants_has_vertex_normal) ]]; constant bool has_vertex_uv0 [[ function_constant(mesh_constants_has_vertex_uv0) ]]; constant bool has_vertex_uv1 [[ function_constant(mesh_constants_has_vertex_uv1) ]]; constant bool has_vertex_uv2 [[ function_constant(mesh_constants_has_vertex_uv2) ]]; constant bool has_vertex_uv3 [[ function_constant(mesh_constants_has_vertex_uv3) ]]; constant bool has_vertex_color [[ function_constant(mesh_constants_has_vertex_color) ]]; constant bool has_vertex_tangent [[ function_constant(mesh_constants_has_vertex_tangent) ]]; typedef struct __Vertex // error: anonymous non-C-compatible type given name for linkage purposes by typedef declaration { float3 position [[ attribute(vfx_vertex_attribute_position), function_constant(has_vertex_position) ]]; float3 normal [[ attribute(vfx_vertex_attribute_normal), function_constant(has_vertex_normal) ]]; float2 uv0 [[ attribute(vfx_vertex_attribute_uv0), function_constant(has_vertex_uv0) ]]; float2 uv1 [[ attribute(vfx_vertex_attribute_uv1), function_constant(has_vertex_uv1) ]]; float2 uv2 [[ attribute(vfx_vertex_attribute_uv2), function_constant(has_vertex_uv2) ]]; float2 uv3 [[ attribute(vfx_vertex_attribute_uv3), function_constant(has_vertex_uv3) ]]; float4 color [[ attribute(vfx_vertex_attribute_color), function_constant(has_vertex_color) ]]; float4 tangent [[ attribute(vfx_vertex_attribute_tangent), function_constant(has_vertex_tangent) ]]; float3 get_position() { return has_vertex_position ? position : float3(0); } float3 get_normal() { return has_vertex_normal ? normal : float3(0, 0, 1); } float2 get_uv0() { return has_vertex_uv0 ? uv0 : float2(0); } float2 get_uv1() { return has_vertex_uv1 ? uv1 : float2(0); } float2 get_uv2() { return has_vertex_uv2 ? uv2 : float2(0); } float2 get_uv3() { return has_vertex_uv3 ? uv3 : float2(0); } float4 get_color() { return has_vertex_color ? color : float4(1); } float4 get_tangent() { return has_vertex_tangent ? tangent : float4(0, 0, 0, 1); } } Vertex; constant int blending_mode [[ function_constant(particle_constants_render_blending_mode) ]]; constant bool enable_texture_2d [[ function_constant(particle_constants_render_enable_texture_2d) ]]; constant bool enable_texture_3d [[ function_constant(particle_constants_render_enable_texture_3d) ]]; constant bool enable_texture_2d_array [[ function_constant(particle_constants_render_enable_texture_2d_array) ]]; constant bool enable_texture_cube [[ function_constant(particle_constants_render_enable_texture_cube) ]]; constant bool enable_texture_prelighted [[ function_constant(particle_constants_render_enable_texture_prelighted) ]]; constant bool has_particle_transform [[ function_constant(particle_constants_render_has_particle_transform) ]]; constant bool texture_is_not_alphapremultiplied [[ function_constant(particle_constants_render_texture_is_not_alphapremultiplied) ]]; constant bool has_pbr_lighting [[ function_constant(particle_constants_render_has_pbr_lighting) ]]; constant bool disable_specular [[ function_constant(particle_constants_render_disable_specular) ]]; constant int animation_mode [[ function_constant(particle_constants_render_animation_mode) ]]; constant bool inter_frame_interpolation [[ function_constant(particle_constants_render_inter_frame_interpolation) ]]; constant bool enable_cutoff [[ function_constant(particle_constants_render_enable_cutoff) ]]; constant bool enable_sorting [[ function_constant(particle_constants_render_enable_sorting) ]]; constant bool enable_thick_lines [[ function_constant(particle_constants_render_enable_thick_lines) ]]; constant bool enable_soft_particles [[ function_constant(particle_constants_render_enable_soft_particles) ]]; constant bool need_opaque_zbuffer [[ function_constant(particle_constants_render_need_opaque_zbuffer) ]]; constant bool need_normal_buffer [[ function_constant(particle_constants_render_need_normal_buffer) ]]; //constant bool hasVoxelBox [[ function_constant(42) ]]; constant bool enable_scattering [[ function_constant(particle_constants_render_enable_scattering) ]]; //constant bool enablePrecomputeGI [[ function_constant(47) ]]; //constant bool enableAO [[ function_constant(43) ]]; //constant bool enableGI [[ function_constant(44) ]]; //constant bool enableIBL [[ function_constant(45) ]]; //constant bool voxelOpacityOnly [[ function_constant(46) ]]; constant bool is_layered_rendering [[ function_constant(renderer_constants_enable_layered_rendering) ]]; constant bool is_multi_viewport_rendering [[ function_constant(renderer_constants_enable_multi_viewport_rendering) ]]; constant bool _enable_vertex_amplification [[ function_constant(renderer_constants_enable_vertex_amplification) ]]; constant bool enable_vertex_amplification = is_function_constant_defined(_enable_vertex_amplification) ? _enable_vertex_amplification : false; constant bool _use_api_v2 [[ function_constant(renderer_constants_use_api_v2) ]]; constant bool use_api_v2 = is_function_constant_defined(_use_api_v2) ? _use_api_v2 : false; constant bool use_api_v1 = !use_api_v2; constant bool enable_clipping [[ function_constant(renderer_constants_enable_clipping) ]]; constant bool enable_alpha_fade [[ function_constant(renderer_constants_enable_alpha_fade) ]]; constant bool enable_dither_fade [[ function_constant(renderer_constants_enable_dither_fade) ]]; constant bool enable_depth_mitigation [[ function_constant(renderer_constants_enable_depth_mitigation) ]]; constant bool enable_sample_mask_read = enable_dither_fade; constant bool enable_sample_mask_write = enable_dither_fade; constant bool enable_auto_alpha [[ function_constant(renderer_constants_enable_auto_alpha) ]]; constant bool outputs_normal_roughness [[ function_constant(renderer_constants_outputs_normal_roughness) ]]; constant bool outputs_albedo_metalness [[ function_constant(renderer_constants_outputs_albedo_metalness) ]]; constant bool outputs_radiance_ao [[ function_constant(renderer_constants_outputs_radiance_ao) ]]; constant int rendering_mode [[ function_constant(renderer_constants_rendering_mode) ]]; constant bool enable_ssao [[ function_constant(renderer_constants_enable_ssao) ]]; constant bool need_inter_frame_interpolation = (animation_mode != -1) && inter_frame_interpolation; constant bool is_opaque = (blending_mode == blend_mode_constants_opaque) || (blending_mode == blend_mode_constants_mask); constant bool need_worldPos = enable_scattering || has_pbr_lighting || enable_clipping; constant bool need_worldNrm = has_pbr_lighting || enable_texture_cube || is_opaque; constant bool need_point_coord = enable_texture_2d || enable_texture_3d || has_pbr_lighting; constant bool need_box_normal = enable_texture_2d || enable_texture_3d; // re_uniforms is true when rendering from RE constant bool gamma_blending [[ function_constant(renderer_constants_gamma_blending) ]]; constant bool re_uniforms [[ function_constant(renderer_constants_inside_re) ]]; constant bool vfx_uniforms = !re_uniforms; constant bool use_api_v1_in_re = use_api_v1 && re_uniforms; #if VFX_RE_SHADERS_AVAILABLE constant bool enable_re_pbr_lighting = re_uniforms; #else constant bool enable_re_pbr_lighting = false; #endif constant bool enable_env_probes = re_uniforms; constant bool enable_multiscatter_brdf = re_uniforms; constant bool enable_breakthrough = re_uniforms; constant bool enable_nearfield_vignetting = re_uniforms; constant bool enable_fog = !re_uniforms; constant bool need_screen_uv = enable_breakthrough; constant bool use_interpolated_crworld [[ function_constant(renderer_constants_use_interpolated_crworld) ]]; // we don't consider using vrr map usage since it's not available on shader side when rendering in RE constant bool might_use_vrr_map = false; constant bool enable_texture_prelighted_array = enable_texture_prelighted && enable_texture_2d_array; constant bool enable_texture_prelighted_2d = enable_texture_prelighted && !enable_texture_2d_array; constant bool uniform_sphere = has_size1D && !has_particle_transform; constant bool non_uniform_sphere = !uniform_sphere; #if TARGET_OS_VISION constant bool enable_depth_as_color = re_uniforms; #else constant bool enable_depth_as_color = false; #endif #endif // MARK: - vfx_render_options.h #ifndef __VFX_RENDER_OPTIONS_H__ #define __VFX_RENDER_OPTIONS_H__ #ifndef VFX_ENUM // avoids importing vfx_base.h here #define VFX_ENUM(type) enum __attribute__((enum_extensibility(closed))) : type #endif typedef VFX_ENUM(uint16_t) { vfx_render_option_enable_clipping = (0x1 << 0), vfx_render_option_enable_breakthrough = (0x1 << 1), vfx_render_option_enable_dither_fade = (0x1 << 2), vfx_render_option_enable_nearfield_vignetting = (0x1 << 3), vfx_render_option_enable_portal_clipping = (0x1 << 4), vfx_render_option_enable_depth_mitigation = (0x1 << 5), vfx_render_option_render_for_blur = (0x1 << 6), vfx_render_option_enable_spatial_focus = (0x1 << 7), vfx_render_option_enable_visual_depth_static_occlusion = (0x1 << 8), vfx_render_option_enable_scene_understanding_static_occlusion = (0x1 << 9), vfx_render_option_enable_visual_depth_static_occlusion_texture_available = (0x1 << 10), } vfx_render_options; static inline uint16_t renderOptionsEnableClipping(uint16_t flags) { return flags | vfx_render_option_enable_clipping; } static inline uint16_t renderOptionsEnableBreakthrough(uint16_t flags) { return flags | vfx_render_option_enable_breakthrough; } static inline uint16_t renderOptionsEnableDitherFade(uint16_t flags) { return flags | vfx_render_option_enable_dither_fade; } static inline uint16_t renderOptionsEnableNearfieldVignetting(uint16_t flags) { return flags | vfx_render_option_enable_nearfield_vignetting; } static inline uint16_t renderOptionsEnablePortalClipping(uint16_t flags) { return flags | vfx_render_option_enable_portal_clipping; } static inline uint16_t renderOptionsEnableDepthMitigation(uint16_t flags) { return flags | vfx_render_option_enable_depth_mitigation; } static inline uint16_t renderOptionsRenderForBlur(uint16_t flags) { return flags | vfx_render_option_render_for_blur; } static inline uint16_t renderOptionsEnableSpatialFocus(uint16_t flags) { return flags | vfx_render_option_enable_spatial_focus; } static inline uint16_t renderOptionsEnableVisualDepthStaticOcclusion(uint16_t flags) { return flags | vfx_render_option_enable_visual_depth_static_occlusion; } static inline uint16_t renderOptionsEnableSceneUnderstandingStaticOcclusion(uint16_t flags) { return flags | vfx_render_option_enable_scene_understanding_static_occlusion; } static inline uint16_t renderOptionsEnableVisualDepthStaticOcclusionTextureAvailable(uint16_t flags) { return flags | vfx_render_option_enable_visual_depth_static_occlusion_texture_available; } static inline bool renderOptionsIsClippingEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_clipping) != 0; } static inline bool renderOptionsIsBreakthroughEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_breakthrough) != 0; } static inline bool renderOptionsIsNearfieldVignettingEnabled(uint16_t flags) { return false; return (flags & vfx_render_option_enable_nearfield_vignetting) != 0; } static inline bool renderOptionsIsDitherFadeEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_dither_fade) != 0; } static inline bool renderOptionsPortalClipPlaneEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_portal_clipping) != 0; } static inline bool renderOptionsIsDepthMitigationEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_depth_mitigation) != 0; } static inline bool renderOptionsIsSpatialFocusEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_spatial_focus) != 0; } static inline bool renderOptionsIsRenderingForBlur(uint16_t flags) { return (flags & vfx_render_option_render_for_blur) != 0; } static inline bool renderOptionsIsVisualDepthStaticOcclusionEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_visual_depth_static_occlusion) != 0; } static inline bool renderOptionsIsSceneUnderstandingStaticOcclusionEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_scene_understanding_static_occlusion) != 0; } static inline bool renderOptionsIsVisualDepthStaticOcclusionTextureAvailableEnabled(uint16_t flags) { return (flags & vfx_render_option_enable_visual_depth_static_occlusion_texture_available) != 0; } #ifdef __cplusplus struct render_options_override { uint16_t overrideBits = 0; uint16_t overrideValues = 0; void disableClipping() { disable(vfx_render_option_enable_clipping); } void disableDitherFade() { disable(vfx_render_option_enable_dither_fade); } void enableDitherFade() { enable(vfx_render_option_enable_dither_fade); } void disableBreakthrough() { disable(vfx_render_option_enable_breakthrough); } void enableNearFieldVignetting() { enable(vfx_render_option_enable_nearfield_vignetting); } uint16_t applyOverrideOn(uint16_t optionFlags) const { return (optionFlags | (overrideValues & overrideBits)) & (overrideValues | ~overrideBits); } void enable(uint16_t flag) { overrideBits |= flag; overrideValues |= flag; } void disable(uint16_t flag) { overrideBits |= flag; overrideValues &= ~flag; } }; #endif #endif // !__VFX_RENDER_OPTIONS_H__ // MARK: - vfx_shader_api.h #ifndef __METAL_VERSION__ typedef struct {} pbr_surface_parameters; typedef struct {} pbr_lighting_parameters; #else struct global_constants; struct external_constants { constant external_scene_constants& sceneConstants [[ buffer(vfx_buffer_binding_index_external_scene_constants), function_constant(use_api_v1_in_re) ]]; constant external_scene_buffers& sceneBuffers [[ buffer(vfx_buffer_binding_index_external_scene_buffers), function_constant(use_api_v1_in_re) ]]; constant external_entity_constants& entityConstants [[ buffer(vfx_buffer_binding_index_external_entity_constants), function_constant(use_api_v1) ]]; // /!\ these constants are also unsed in VFX2 to pass opacity }; struct system_treatment_parameters { float3 crWorldPosition; uint16_t cameraIndex; render_options_override renderOptionsOverride; external_constants externals; float2 screenUV [[ function_constant(enable_breakthrough) ]]; float4 screenPosition [[ function_constant(enable_dither_fade) ]]; }; struct pbr_surface_parameters { half3 baseColor = half3(0.827h, 0.827h, 0.827h); half roughness = .5h; half metallic = 0.h; half3 emissive = 0.h; float3 normal; // in world space float3 tangent; // in world space half alpha = 1.h; half ambientOcclusion = 1.h; half thickness = 1.h; }; struct pbr_lighting_parameters { float3 crWorldPosition; float3 worldViewDir; pbr_surface_parameters surface; bool useAnalyticalLights = true; bool useIBL = true; bool useEnvProbes = true; constant global_constants& globals; external_constants externals; }; // this is the API exposed to custom shaders // this will be visible functions linked through fragmentLinkedFunctions.privateFunctions // linking is not supported on < apple6 devices #if USE_LINKED_FUNCTIONS namespace vfx { namespace api { [[visible]] half3 computePbrLighting(thread const pbr_lighting_parameters& params); [[visible]] half4 applySystemTreatments(half4 color, thread const system_treatment_parameters& params, thread uint& sampleMask); }} // namespace vfx::api #endif // USE_LINKED_FUNCTIONS #endif // __METAL_VERSION__ // MARK: - vfx_particle_io.h #ifdef __METAL_VERSION__ #ifdef VFX_IS_UBER_HEADER constant bool enable_per_vertex_color = true; constant bool enable_portal_clip_plane = false; #endif constant bool enable_per_primitive_color = !enable_per_vertex_color; struct particle_vertex_io { #if TARGET_OS_VISION float4 position [[ position, invariant ]]; #else float4 position [[ position ]]; #endif float3 crworld_position; float2 uv0; float2 uv1; float2 uv2; float2 uv3; float2 screen_uv [[ center_no_perspective ]]; half4 color [[ function_constant(enable_per_vertex_color) ]]; half4 tangent; half3 normal; float portal_clip_distance [[ clip_distance, function_constant(enable_portal_clip_plane) ]]; half linear_z; // store ro/rd in uv to avoid extra size in mesh shaders void set_ro(float3 ro) { uv0.xy = ro.xy; uv2.x = ro.z; } void set_rd(float3 rd) { uv1.xy = rd.xy; uv2.y = rd.z; } float3 get_ro() const { return float3(uv0.xy, uv2.x); } float3 get_rd() const { return float3(uv1.xy, uv2.y); } #if DEBUG_RIBBON int prof_gen; int prof_index; int p0; int p1; int p2; int p3; float3 deriv; #endif }; struct particle_primitive_io { float3 world_axis_x [[ flat ]]; float3 world_axis_y [[ flat ]]; float3 world_axis_z [[ flat ]]; float3 center_crworld [[ flat ]]; float3 radii [[ flat ]]; float2 e0 [[ flat ]]; float2 e1 [[ flat ]]; half4 primitive_color [[ flat, function_constant(enable_per_primitive_color) ]]; int32_t particle_index [[ flat ]]; ushort2 array_frames [[ flat ]]; half frame [[ flat ]]; half thickness [[ flat ]]; uint16_t camera_index [[ flat ]]; uint8_t layer_id [[ render_target_array_index, function_constant(is_layered_rendering) ]]; uint8_t viewport_id [[ viewport_array_index, function_constant(is_multi_viewport_rendering) ]]; }; struct particle_io { particle_vertex_io vertice; // in case you ask yourself why such a name: one vertices that can't be vertex because of metal! particle_primitive_io primitive; half4 get_color() const { return enable_per_vertex_color ? vertice.color : primitive.primitive_color; } void set_color(half4 color) { enable_per_vertex_color ? vertice.color = color : primitive.primitive_color = color; } uint16_t camera_index() const { return (re_uniforms || is_layered_rendering || is_multi_viewport_rendering) ? primitive.camera_index : 0; } uint16_t layer_index() const { return is_layered_rendering ? primitive.layer_id : 0; } uint16_t viewport_index() const { return is_multi_viewport_rendering ? primitive.viewport_id : 0; } }; struct particle_point_io { particle_io io; float fragment_size [[ point_size ]]; }; #endif // __METAL_VERSION__ // MARK: - vfx_shader_builtin.h #ifdef __METAL_VERSION__ #define SUPPORTS_MESH_SHADING (__METAL_VERSION__ >= 300) // Fragment Output struct forward_frag_out { half4 color [[ color(0) ]]; half4 normalRoughness [[ color(2), function_constant(outputs_normal_roughness)]]; half4 albedoMetalness [[ color(4), function_constant(outputs_albedo_metalness)]]; half4 radianceAO [[ color(5), function_constant(outputs_radiance_ao)]]; float depth_as_color [[ color(4), function_constant(enable_depth_as_color) ]]; uint sample_mask [[ sample_mask, function_constant(enable_sample_mask_write) ]]; }; struct deferred_frag_out { half4 albedo [[ color(deferred_attachments_albedo) ]]; half4 normal [[ color(deferred_attachments_normal) ]]; // normal.w is thickness in VFX1 half4 roughness_metalness_ao [[ color(deferred_attachments_roughness_metalness_ao) ]]; half4 emission [[ color(deferred_attachments_emission) ]]; half2 velocity [[ color(deferred_attachments_velocity) ]]; }; // versions where the depth can be modified by the fragment shader struct raytraced_forward_frag_out { forward_frag_out base; float depth [[ depth(less) ]]; }; struct raytraced_deferred_frag_out { // fragile deferred_frag_out out; float depth [[ depth(less) ]]; }; static inline void setDeferredAlbedoAndAlpha(thread deferred_frag_out& out, half4 albedo) { out.albedo = albedo; } static inline void setDeferredAlbedo(thread deferred_frag_out& out, half3 albedo) { // If we don't write alpha, write 1 because deferred is multiplying the result by albedo.a... out.albedo = half4(albedo, 1.h); } // REMOVE alpha has no value in GBuffer... static inline void setDeferredAlpha(thread deferred_frag_out& out, half alpha) { out.albedo.a = alpha; } static inline void setDeferredNormalLinearZ(thread deferred_frag_out& out, half3 normal_ws, half linearZ) { out.normal.xyz = normal_ws; out.normal.w = linearZ; } static inline void setDeferredRoughness(thread deferred_frag_out& out, half roughness) { out.roughness_metalness_ao.x = roughness; } static inline void setDeferredMetalness(thread deferred_frag_out& out, half metalness) { out.roughness_metalness_ao.y = metalness; } static inline void setDeferredAmbientOcclusion(thread deferred_frag_out& out, half ao) { out.roughness_metalness_ao.z = ao; } static inline void setDeferredRMA(thread deferred_frag_out& out, half4 rma) { out.roughness_metalness_ao = rma; } static inline void setDeferredEmission(thread deferred_frag_out& out, half3 e) { out.emission = half4(e, 0.h); } static inline void setDeferredVelocity(thread deferred_frag_out& out, half2 v) { out.velocity = v; } static inline half3 unpremultiply(half4 c) { return c.rgb / max(c.a, 1.h / MAXHALF); } static inline half4 applyOpacity(half4 color, half opacity){ if (enable_alpha_fade && is_function_constant_defined(blending_mode)) { switch (blending_mode) { case blend_mode_constants_additive: color.rgb *= opacity; break; case blend_mode_constants_alpha: // Apply opacity to all channels as we always blend with premultiplied alpha. color *= opacity; break; } } return color; } // clip space is before normalization by w // ndc is after. static inline float2 ndc_to_uv(float2 ndc, float4 viewport_percents) { float2 vc = float2(ndc * float2(0.5f, -0.5f) + 0.5f); // [-1..1] -> [0..1] + flipY return vc * viewport_percents.zw + viewport_percents.xy; // sub viewport taken into account } static inline float2 ndc_to_viewport(float2 ndc, float4 viewport_percents, float2 rendertarget_size) { return ndc_to_uv(ndc, viewport_percents) * rendertarget_size; } using builtin_surface = pbr_surface_parameters; static inline builtin_surface make_surface(half4 albedo_alpha, half3 rma, half emissive) { return (builtin_surface){ .baseColor = albedo_alpha.rgb * saturate(1 - emissive), .roughness = rma.x, .metallic = rma.y, .emissive = albedo_alpha.rgb * max(0.h, emissive), .alpha = albedo_alpha.w, .ambientOcclusion = rma.z, .thickness = 1.h }; } #if VFX_RE_SHADERS_AVAILABLE struct re_buffers { constant EntityConstants& entityConstants [[ buffer(vfx_buffer_binding_index_re_entity_constants), function_constant(use_api_v2) ]]; constant ViewConstants& viewConstants [[ buffer(vfx_buffer_binding_index_re_view_constants), function_constant(use_api_v2) ]]; constant GlobalConstants& globalConstants [[ buffer(vfx_buffer_binding_index_re_global_constants), function_constant(use_api_v2) ]]; constant re_vfx_object_constants& objectConstants [[ buffer(vfx_buffer_binding_index_re_vfx_object_constants), function_constant(use_api_v2) ]]; constant re_entity_argument_buffer& entityArgumentBuffer [[ buffer(vfx_buffer_binding_index_re_entity_argument_buffer), function_constant(use_api_v2) ]]; constant re_scene_argument_buffer& sceneArgumentBuffer [[ buffer(vfx_buffer_binding_index_re_scene_argument_buffer), function_constant(use_api_v2) ]]; #if TARGET_OS_SIMULATOR constant VirtualEnvironmentProbeLighting::TextureArgumentBuffer& virtualEnvProbeTextures [[ buffer(vfx_buffer_binding_index_virtual_env_probe_textures_sim), function_constant(use_api_v2) ]]; #endif // TARGET_OS_SIMULATOR }; #endif struct builtin_args { constant global_constants& globals [[ buffer(vfx_buffer_binding_index_global_constants) ]]; constant view_constants& viewUniforms [[ buffer(vfx_buffer_binding_index_view_constants) ]]; external_constants externals; #if TARGET_OS_SIMULATOR constant VirtualEnvironmentProbeLighting::TextureArgumentBuffer& virtualEnvProbeTextures [[ buffer(vfx_buffer_binding_index_virtual_env_probe_textures), function_constant(use_api_v1) ]]; #endif // TARGET_OS_SIMULATOR #if VFX_RE_SHADERS_AVAILABLE re_buffers reData; #endif uint16_t camera_index(thread uint32_t& inst_id, uint16_t amp_id) { if (re_uniforms) { if (viewUniforms.useVertexAmplification) { return amp_id; } else { // this deals with viewportCount of 1 or 2 inst_id = inst_id >> (viewUniforms.cameraCount - 1); uint16_t cam = inst_id & (viewUniforms.cameraCount - 1); return cam; } } else { if (viewUniforms.useVertexAmplification) { return amp_id; } else { uint16_t cam = inst_id % viewUniforms.cameraCount; inst_id = inst_id / viewUniforms.cameraCount; return cam; } } } float3 crws_from_world(float3 ws_pos) { return ws_pos - viewUniforms.worldReferencePosition; } float4 crws_from_world(float4 ws_pos) { // keep w intact ws_pos.xyz -= viewUniforms.worldReferencePosition; return ws_pos; } float4x4 crws_from_world(float4x4 ws_transform) { // keep w intact ws_transform[3].xyz -= viewUniforms.worldReferencePosition; return ws_transform; } float3 world_from_crws(float3 crws_pos) { return crws_pos + viewUniforms.worldReferencePosition; } float3 crws() { return viewUniforms.worldReferencePosition; } float3 world_camera_position(uint16_t camera_index) { return world_from_crws(viewUniforms.crws_camera_pos[camera_index]); } float3 crworld_camera_position(uint16_t camera_index) { return viewUniforms.crws_camera_pos[camera_index]; } float3 transform_position_view_from_world(float3 ws_pos, uint16_t camera_index) { return vfx_transform_position(view_from_crws(camera_index), crws_from_world(ws_pos.xyz)); } float4 transform_position_proj_from_world(float3 ws_pos, uint16_t camera_index) { return proj_from_crws(camera_index) * float4(crws_from_world(ws_pos), 1); } float4x4 proj_from_crws(uint16_t camera_index) { return viewUniforms.proj_from_crworld[camera_index]; } float4x4 view_from_crws(uint16_t camera_index) { return viewUniforms.view_from_crworld[camera_index]; } float4x4 proj_from_view(uint16_t camera_index) { return viewUniforms.proj_from_view[camera_index]; } float4x4 view_from_proj(uint16_t camera_index) { return viewUniforms.view_from_proj[camera_index]; } float4x4 lastframe_proj_from_crws(uint16_t camera_index) { return viewUniforms.lastframe_proj_from_crworld[camera_index]; } float4x4 view_from_world(uint16_t camera_index) { // view_from_crworld * crws_from_world return vfx_post_translate(viewUniforms.view_from_crworld[camera_index], -viewUniforms.worldReferencePosition); } float3x3 world_from_view(uint16_t camera_index) { // Orthonormal 3x3 matrices inverse = transpose (Warning: if the camera has a scale, this is wrong) return transpose(vfx_float3x3(viewUniforms.view_from_crworld[camera_index])); } float3 viewDir(uint16_t camera_index) { // in world space return -world_from_view(camera_index)[2]; } float dt() { return globals.dt; } float2 drawableSize() { return viewUniforms.renderTargetSize.xy; } float2 invDrawableSize() { return viewUniforms.renderTargetSize.zw; } float2 nearFar() { return viewUniforms.nearFar; } int sampleCount() const { return vfx_uniforms ? 1 : viewUniforms.renderTargetDepthStencilFormatSampleCount.y; } inline system_treatment_parameters makeSystemTreatmentParameters(float3 crWorldPosition, float4 position, uint16_t camera_index, float2 screenUV) { auto params = system_treatment_parameters { .crWorldPosition = crWorldPosition, .cameraIndex = camera_index, .externals = externals, .screenUV = screenUV, .screenPosition = position }; return params; } inline pbr_lighting_parameters makeLightingParameters(float3 crWorldPosition, pbr_surface_parameters surface = pbr_surface_parameters(), bool useAnalyticalLights = true) { return pbr_lighting_parameters { .crWorldPosition = crWorldPosition, .worldViewDir = is_proj_orthographic(0) ? viewDir(0) : -normalize(crWorldPosition), .surface = surface, .useAnalyticalLights = useAnalyticalLights, .useIBL = true, .useEnvProbes = true, .globals = globals, .externals = externals }; } inline pbr_lighting_parameters makeLightingParameters(float3 crWorldPosition, float3 worldNormal, float3 worldtangent, pbr_surface_parameters surface = pbr_surface_parameters(), bool useAnalyticalLights = true) { surface.normal = worldNormal; surface.tangent = worldtangent; return makeLightingParameters(crWorldPosition, surface, useAnalyticalLights); } float3 crworld_from_viewport(float3 vc, uint16_t camera_index) { float4 p; if (might_use_vrr_map && viewUniforms.vrrMapScreenSize.x > 0.f) { rasterization_rate_map_decoder map(globals.vrr_map); float2 pos_screen = map.map_physical_to_screen_coordinates(vc.xy, camera_index); p = viewUniforms.crws_from_viewport[camera_index] * float4(pos_screen, vc.z, 1.f); } else { p = viewUniforms.crws_from_viewport[camera_index] * float4(vc.xyz, 1.f); } return p.xyz / p.w; } float3 crworld_position(thread const particle_io& io) { return use_interpolated_crworld ? io.vertice.crworld_position : crworld_from_viewport(io.vertice.position.xyz, io.camera_index()); } inline float2 computeScreenUV(float4 screenPosition, uint16_t cameraIndex) { // Convert from clip-space to screen-space UV texture-coordinates. // Since we use a left-handed coordinate system, we flip Y, and since we want to map the result from 0.0 to 1.0. return ndc_to_uv(screenPosition.xy / screenPosition.w, viewUniforms.viewport_percents[cameraIndex]); } float linearZFromDepth(float depth) { return 1 / (viewUniforms.linearizationFactors.x + depth * viewUniforms.linearizationFactors.y); } bool is_proj_orthographic(unsigned short camera_index) { return viewUniforms.proj_from_view[camera_index][3][3] != 0.; } #ifndef VFX_IS_UBER_HEADER particle_vertex_io vertexIO(uint16_t camera_index, float3 crworld_pos) { return vertexIO(camera_index, float4(crworld_pos, 1.f)); } particle_vertex_io vertexIO(uint16_t camera_index, float4 crworld_pos) { particle_vertex_io result; result.position = proj_from_crws(camera_index) * crworld_pos; if (use_interpolated_crworld) { result.crworld_position = float3(crworld_pos); } #if VFX_RE_SHADERS_AVAILABLE if (enable_portal_clip_plane) { if (renderOptionsPortalClipPlaneEnabled(use_api_v2 ? reData.objectConstants.render_options : externals.entityConstants.render_options)) { result.portal_clip_distance = dot(float4(result.crworld_position, 1), use_api_v2 ? reData.entityConstants.portalPlane : externals.entityConstants.portal_plane); } else { result.portal_clip_distance = 0; } } #endif return result; } particle_vertex_io vertexIO(uint16_t camera_index, float3 local_position, thread const float4x4& crworld_from_local, Vertex v) { float3 crworld_pos = vfx_transform_position(crworld_from_local, local_position); particle_vertex_io result = vertexIO(camera_index, crworld_pos); result.uv0 = v.get_uv0(); result.uv1 = v.get_uv1(); result.uv2 = v.get_uv2(); result.uv3 = v.get_uv3(); if (has_vertex_normal) { result.normal = half3(vfx_transform_normal(crworld_from_local, v.get_normal())); } if (has_vertex_tangent) { result.tangent = half4(half3(vfx_transform_normal(crworld_from_local, v.get_tangent().xyz)), half(v.get_tangent().w)); } return result; } void fillCameraIO(uint16_t camera_index, thread particle_io& io) { io.primitive.camera_index = camera_index; io.vertice.screen_uv = computeScreenUV(io.vertice.position, camera_index); io.primitive.layer_id = viewUniforms.useVertexAmplification ? 0 : camera_index; } #endif // VFX_IS_UBER_HEADER }; #endif // __METAL_VERSION__ // MARK: - vfx_script_helpers.h struct vfx_script_buffer { vfx_ref void *_Nonnull pointer; uint64_t count; }; #ifdef __cplusplus template struct vfx_script_array { constexpr uint32_t size() const vfx_out { return C; } T array[C]; }; #endif #ifndef __METAL_VERSION__ struct vfx_script_graph_value_t { uint64_t type; void *_Nullable pointer; }; struct vfx_script_uuid_t { uint64_t a, b; }; struct vfx_script_keypath_ref_t { const char *_Nonnull root_type; const char *_Nonnull identifier; const char *_Nullable value_type_if_dynamic; }; #endif static inline float VFX_SIMD vfx_bounceEase(float t, float duration, float frequency, float decay) { float frequencyTwoPi = frequency * VFX_2PI; float tMinusDuration = t - duration; float e = exp(tMinusDuration * decay); float s = sin(frequencyTwoPi * tMinusDuration); float seDivide = vfx_is_nan(e) || vfx_is_almost_zero(e) ? 0.0f : e / s; float div = vfx_is_nan(frequencyTwoPi) || vfx_is_almost_zero(frequencyTwoPi) ? 0.0f : seDivide / frequencyTwoPi; float durationInverse = vfx_is_nan(duration) || vfx_is_almost_zero(duration) ? 0.0f : 1.f / duration; float a = t / duration; float b = 1.f + div * durationInverse; return t < duration ? a : b; } static inline float VFX_SIMD vfx_perlin_noise_hash(simd_float3 _p) { simd_float3 p = vfx_make_float3(17.0f * vfx_fract(_p.x * 0.3183099f + 0.1f), 17.0f * vfx_fract(_p.y * 0.3183099f + 0.1f), 17.0f * vfx_fract(_p.z * 0.3183099f + 0.1f)); return vfx_fract(p.x * p.y * p.z * (p.x + p.y + p.z)); } static inline float VFX_SIMD vfx_perlin_noise(simd_float3 x) { simd_float3 i = vfx_floor(x); simd_float3 f = vfx_fract(x); f = f * f * (3.0f - 2.0f * f); return vfx_mix( vfx_mix(vfx_mix( vfx_perlin_noise_hash(i + vfx_make_float3(0,0,0)), vfx_perlin_noise_hash(i + vfx_make_float3(1,0,0)), f.x), vfx_mix( vfx_perlin_noise_hash(i + vfx_make_float3(0,1,0)), vfx_perlin_noise_hash(i + vfx_make_float3(1,1,0)), f.x), f.y), vfx_mix(vfx_mix( vfx_perlin_noise_hash(i + vfx_make_float3(0,0,1)), vfx_perlin_noise_hash(i + vfx_make_float3(1,0,1)), f.x), vfx_mix( vfx_perlin_noise_hash(i + vfx_make_float3(0,1,1)), vfx_perlin_noise_hash(i + vfx_make_float3(1,1,1)), f.x), f.y), f.z); } static inline simd_float2 VFX_SIMD vfx_perlin_noise(simd_float3 x, simd_float3 y) { return vfx_make_float2(vfx_perlin_noise(x), vfx_perlin_noise(y)); } static inline simd_float3 VFX_SIMD vfx_perlin_noise(simd_float3 x, simd_float3 y, simd_float3 z) { return vfx_make_float3(vfx_perlin_noise(x), vfx_perlin_noise(y), vfx_perlin_noise(z)); } static inline simd_float4 VFX_SIMD vfx_perlin_noise(simd_float3 x, simd_float3 y, simd_float3 z, simd_float3 w) { return vfx_make_float4(vfx_perlin_noise(x), vfx_perlin_noise(y), vfx_perlin_noise(z), vfx_perlin_noise(w)); } #ifdef __METAL_VERSION__ static inline float vfx_script_clock_time(thread frame_constants& constants) { return constants.time; } static inline float vfx_script_clock_delta_time(thread frame_constants& constants) { return constants.dt; } static inline bool vfx_script_clock_is_first_frame(thread frame_constants& constants) { return constants.isFirstFrame; } static inline bool vfx_script_clock_simulation_index(thread frame_constants& constants) { return constants.simulationIndex; } #endif // MARK: - vfx_script_objc.h #ifndef __METAL_VERSION__ #define _CLASS_VAR(cls) _vfx_objc_cls_##cls #define _SEL_VAR(sel) _vfx_objc_sel_##sel #define VFX_OBJC_CLASS_DECL(cls) static Class _CLASS_VAR(cls) #define VFX_OBJC_CLASS_DEF(cls) _CLASS_VAR(cls) = objc_lookUpClass(#cls) #define VFX_OBJC_SEL_DECL(sel) static SEL _SEL_VAR(sel) #define VFX_OBJC_SEL_DEF(sel, name) _SEL_VAR(sel) = sel_registerName(name) #define VFX_OBJC_SYMBOL_DECL(sym) extern "C" void *sym #define _class(cls) _CLASS_VAR(cls) #define _sel(sel) _SEL_VAR(sel) template inline Ret vfx_msgSend(const void * _Nullable obj, SEL _Nonnull selector, Args... args) { using sendMsg_t = Ret (*)(const void *, SEL, Args...); const sendMsg_t sendMsg = reinterpret_cast(&objc_msgSend); return (*sendMsg)(obj, selector, args...); } NS_ASSUME_NONNULL_BEGIN // NSObject VFX_OBJC_SEL_DECL(setValue_forKeyPath_); VFX_OBJC_SEL_DECL(valueForKeyPath_); // Foundation VFX_OBJC_CLASS_DECL(NSString); VFX_OBJC_CLASS_DECL(NSDictionary); VFX_OBJC_CLASS_DECL(NSMutableDictionary); VFX_OBJC_CLASS_DECL(NSNumber); VFX_OBJC_CLASS_DECL(VFXTransaction); VFX_OBJC_SEL_DECL(stringWithUTF8String_); VFX_OBJC_SEL_DECL(begin); VFX_OBJC_SEL_DECL(commit); VFX_OBJC_SEL_DECL(setAnimationDurationAsFloat_); // VFXNode VFX_OBJC_CLASS_DECL(VFXNode); VFX_OBJC_SEL_DECL(addAnimationAsset_forKey_); VFX_OBJC_SEL_DECL(removeAllAnimations); VFX_OBJC_SEL_DECL(removeAnimationForKey_); VFX_OBJC_SEL_DECL(clone); VFX_OBJC_SEL_DECL(parentNode); VFX_OBJC_SEL_DECL(addChildNode_); VFX_OBJC_SEL_DECL(childNodes); VFX_OBJC_SEL_DECL(deepClone); VFX_OBJC_SEL_DECL(script_rootNode); VFX_OBJC_SEL_DECL(insertChildNode_atIndex_); VFX_OBJC_SEL_DECL(removeFromParentNode); VFX_OBJC_SEL_DECL(childNodeWithName_); VFX_OBJC_SEL_DECL(convertPosition_fromNode_); VFX_OBJC_SEL_DECL(convertVector_fromNode_); VFX_OBJC_SEL_DECL(convertTransform_fromNode_); VFX_OBJC_SEL_DECL(convertPosition_toNode_); VFX_OBJC_SEL_DECL(convertVector_toNode_); VFX_OBJC_SEL_DECL(convertTransform_toNode_); VFX_OBJC_SEL_DECL(localTranslateBy_); VFX_OBJC_SEL_DECL(localRotateBy_); VFX_OBJC_SEL_DECL(rotateBy_aroundTarget_); VFX_OBJC_SEL_DECL(projectPoint_); VFX_OBJC_SEL_DECL(unprojectPoint_); VFX_OBJC_SEL_DECL(script_hitTest_options_); VFX_OBJC_SEL_DECL(hitTestWithSegmentFromPoint_toPoint_options_); VFX_OBJC_SEL_DECL(node); VFX_OBJC_SEL_DECL(setState_); VFX_OBJC_SEL_DECL(setStateNamed_); VFX_OBJC_SEL_DECL(physicsBody); // VFXPhysicsBody VFX_OBJC_SEL_DECL(applyForce_impulse_); VFX_OBJC_SEL_DECL(applyForce_atPosition_impulse_); VFX_OBJC_SEL_DECL(applyTorque_impulse_); // VFXAssetNode VFX_OBJC_SEL_DECL(script_instantiate_); // NSDictionary VFX_OBJC_SEL_DECL(dictionary); VFX_OBJC_SEL_DECL(setValue_forKey_); VFX_OBJC_SEL_DECL(setObject_forKey_); // NSValue VFX_OBJC_CLASS_DECL(NSValue); // NSNumber VFX_OBJC_SEL_DECL(numberWithBool_); VFX_OBJC_SEL_DECL(numberWithUnsignedInteger_); VFX_OBJC_SEL_DECL(numberWithFloat_); VFX_OBJC_SEL_DECL(count); // NSArray VFX_OBJC_SEL_DECL(firstObject); // VFXHitTestResult VFX_OBJC_SEL_DECL(geometryIndex); VFX_OBJC_SEL_DECL(faceIndex); VFX_OBJC_SEL_DECL(localCoordinates); VFX_OBJC_SEL_DECL(worldCoordinates); VFX_OBJC_SEL_DECL(localNormal); VFX_OBJC_SEL_DECL(worldNormal); VFX_OBJC_SEL_DECL(modelTransform); VFX_OBJC_SEL_DECL(boneNode); VFX_OBJC_SEL_DECL(uv0); VFX_OBJC_SEL_DECL(presentationBoneNode); VFX_OBJC_SEL_DECL(presentationNode); // VFXRenderer VFX_OBJC_SEL_DECL(_contentsScaleFactor); VFX_OBJC_SEL_DECL(_screenSize); VFX_OBJC_SYMBOL_DECL(VFXHitTestRootNodeKey); VFX_OBJC_SYMBOL_DECL(VFXHitTestBoundingBoxOnlyKey); VFX_OBJC_SYMBOL_DECL(VFXHitTestIgnoreHiddenNodesKey); VFX_OBJC_SYMBOL_DECL(VFXHitTestOptionCategoryBitMask); VFX_OBJC_SEL_DECL(presentationObject); NS_ASSUME_NONNULL_END static inline void vfx_script_initialize_objc_helpers(void) { static dispatch_once_t once; dispatch_once(&once, ^{ // NSObject VFX_OBJC_SEL_DEF(setValue_forKeyPath_, "setValue:forKeyPath:"); VFX_OBJC_SEL_DEF(valueForKeyPath_, "valueForKeyPath:"); VFX_OBJC_CLASS_DEF(NSString); VFX_OBJC_SEL_DEF(stringWithUTF8String_, "stringWithUTF8String:"); VFX_OBJC_CLASS_DEF(VFXTransaction); VFX_OBJC_SEL_DEF(begin, "begin"); VFX_OBJC_SEL_DEF(setAnimationDurationAsFloat_, "setAnimationDurationAsFloat:"); VFX_OBJC_SEL_DEF(commit, "commit"); VFX_OBJC_CLASS_DEF(NSDictionary); VFX_OBJC_CLASS_DEF(NSMutableDictionary); VFX_OBJC_SEL_DEF(dictionary, "dictionary"); VFX_OBJC_SEL_DEF(setValue_forKey_, "setValue:forKey:"); VFX_OBJC_SEL_DEF(setObject_forKey_, "setObject:forKey:"); VFX_OBJC_CLASS_DEF(NSValue); VFX_OBJC_CLASS_DEF(NSNumber); VFX_OBJC_SEL_DEF(numberWithBool_, "numberWithBool:"); VFX_OBJC_SEL_DEF(numberWithFloat_, "numberWithFloat:"); VFX_OBJC_SEL_DEF(numberWithUnsignedInteger_, "numberWithUnsignedInteger:"); // NSArray VFX_OBJC_SEL_DEF(firstObject, "firstObject"); VFX_OBJC_SEL_DEF(count, "count"); VFX_OBJC_CLASS_DEF(VFXNode); VFX_OBJC_SEL_DEF(addAnimationAsset_forKey_, "addAnimationAsset:forKey:"); VFX_OBJC_SEL_DEF(removeAllAnimations, "removeAllAnimations"); VFX_OBJC_SEL_DEF(removeAnimationForKey_, "removeAnimationForKey:"); VFX_OBJC_SEL_DEF(script_rootNode, "script_rootNode"); VFX_OBJC_SEL_DEF(clone, "clone"); VFX_OBJC_SEL_DEF(parentNode, "parentNode"); VFX_OBJC_SEL_DEF(addChildNode_, "addChildNode:"); VFX_OBJC_SEL_DEF(childNodes, "childNodes"); VFX_OBJC_SEL_DEF(deepClone, "deepClone"); VFX_OBJC_SEL_DEF(insertChildNode_atIndex_, "insertChildNode:atIndex:"); VFX_OBJC_SEL_DEF(removeFromParentNode, "removeFromParentNode"); VFX_OBJC_SEL_DEF(childNodeWithName_, "childNodeWithName:"); VFX_OBJC_SEL_DEF(convertPosition_fromNode_, "convertPosition:fromNode:"); VFX_OBJC_SEL_DEF(convertVector_fromNode_, "convertVector:fromNode:"); VFX_OBJC_SEL_DEF(convertTransform_fromNode_, "convertTransform:fromNode:"); VFX_OBJC_SEL_DEF(convertPosition_toNode_, "convertPosition:toNode:"); VFX_OBJC_SEL_DEF(convertVector_toNode_, "convertVector:toNode:"); VFX_OBJC_SEL_DEF(convertTransform_toNode_, "convertTransform:toNode:"); VFX_OBJC_SEL_DEF(localTranslateBy_, "localTranslateBy:"); VFX_OBJC_SEL_DEF(localRotateBy_, "localRotateBy:"); VFX_OBJC_SEL_DEF(rotateBy_aroundTarget_, "rotateBy:aroundTarget:"); VFX_OBJC_SEL_DEF(projectPoint_, "projectPoint:"); VFX_OBJC_SEL_DEF(unprojectPoint_, "unprojectPoint:"); VFX_OBJC_SEL_DEF(script_hitTest_options_, "script_hitTest:options:"); VFX_OBJC_SEL_DEF(hitTestWithSegmentFromPoint_toPoint_options_, "hitTestWithSegmentFromPoint:toPoint:options:"); VFX_OBJC_SEL_DEF(node, "node"); VFX_OBJC_SEL_DEF(setState_, "setState:"); VFX_OBJC_SEL_DEF(setStateNamed_, "setStateNamed:"); VFX_OBJC_SEL_DEF(_contentsScaleFactor, "_contentsScaleFactor"); VFX_OBJC_SEL_DEF(_screenSize, "_screenSize"); VFX_OBJC_SEL_DEF(physicsBody, "physicsBody"); // VFXAssetNode VFX_OBJC_SEL_DEF(script_instantiate_, "script_instantiate:"); // VFXHitTestResults VFX_OBJC_SEL_DEF(geometryIndex, "geometryIndex"); VFX_OBJC_SEL_DEF(faceIndex, "faceIndex"); VFX_OBJC_SEL_DEF(localCoordinates, "localCoordinates"); VFX_OBJC_SEL_DEF(worldCoordinates, "worldCoordinates"); VFX_OBJC_SEL_DEF(localNormal, "localNormal"); VFX_OBJC_SEL_DEF(worldNormal, "worldNormal"); VFX_OBJC_SEL_DEF(modelTransform, "modelTransform"); VFX_OBJC_SEL_DEF(boneNode, "boneNode"); VFX_OBJC_SEL_DEF(uv0, "uv0"); VFX_OBJC_SEL_DEF(presentationBoneNode, "presentationBoneNode"); VFX_OBJC_SEL_DEF(presentationNode, "presentationNode"); // VFXPhysicsBody VFX_OBJC_SEL_DEF(applyForce_impulse_, "applyForce:impulse:"); VFX_OBJC_SEL_DEF(applyForce_atPosition_impulse_, "applyForce:atPosition:impulse:"); VFX_OBJC_SEL_DEF(applyTorque_impulse_, "applyTorque:impulse:"); VFX_OBJC_SEL_DEF(presentationObject, "presentationObject"); }); } #endif