fsiddi-darthpro:blender fsiddi (compositor-2016) $ /Users/fsiddi/Developer/blender-build/build_darwin/bin/blender.app/Contents/MacOS/blender found bundled python: /Users/fsiddi/Developer/blender-build/build_darwin/bin/blender.app/Contents/Resources/2.80/python OPENCL error: [CL_DEVICE_NOT_AVAILABLE] : OpenCL Error : Error: Build Program driver returned (-2) OPENCL error: OpenCL Warning : clBuildProgram failed: could not build program for 0x1021c00 (AMD Radeon HD - FirePro D700 Compute Engine) (err:-2) OPENCL error: [CL_BUILD_ERROR] : OpenCL Build Error : Compiler build log: :307:9: warning: 'make_int2' macro redefined #define make_int2(x, y) ((ivec2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3199:9: note: previous definition is here #define make_int2(A,B) (int2)((A),(B)) ^ :373:9: warning: 'make_int3' macro redefined #define make_int3(x, y, z) ((int3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3200:9: note: previous definition is here #define make_int3(A,B,C) (int3)((A),(B),(C)) ^ :442:9: warning: 'make_int4' macro redefined #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3201:9: note: previous definition is here #define make_int4(A,B,C,D) (int4)((A),(B),(C),(D)) ^ :491:9: warning: 'make_uint2' macro redefined #define make_uint2(x, y) ((uint2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3205:9: note: previous definition is here #define make_uint2(A,B) (uint2)((A),(B)) ^ :539:9: warning: 'make_uint3' macro redefined #define make_uint3(x, y, z) ((uint3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3206:9: note: previous definition is here #define make_uint3(A,B,C) (uint3)((A),(B),(C)) ^ :588:9: warning: 'make_uint4' macro redefined #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3207:9: note: previous definition is here #define make_uint4(A,B,C,D) (uint4)((A),(B),(C),(D)) ^ :639:9: warning: 'make_float2' macro redefined #define make_float2(x, y) ((float2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3223:9: note: previous definition is here #define make_float2(A,B) (float2)((A),(B)) ^ :705:9: warning: 'make_float3' macro redefined #define make_float3(x, y, z) ((float3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3224:9: note: previous definition is here #define make_float3(A,B,C) (float3)((A),(B),(C)) ^ :775:9: warning: 'make_float4' macro redefined #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3226:9: note: previous definition is here #define make_float4(A,B,C,D) (float4)((A),(B),(C),(D)) ^ :3305:9: warning: 'abs' macro redefined #define abs(x) fabs(x) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4770:13: note: previous definition is here #define abs(__x) __cl_abs(__x) ^ :3309:26: warning: no previous prototype for function 'min4' comp_device_inline float min4(float a, float b, float c, float d) ^ :3314:26: warning: no previous prototype for function 'max4' comp_device_inline float max4(float a, float b, float c, float d) ^ :3376:25: warning: no previous prototype for function 'isnan_safe' comp_device_inline bool isnan_safe(float f) ^ :3411:26: warning: no previous prototype for function 'saturate' comp_device_inline float saturate(float a) ^ :3424:24: warning: no previous prototype for function 'float_to_int' comp_device_inline int float_to_int(float f) ^ :3430:24: warning: no previous prototype for function 'floor_to_int' comp_device_inline int floor_to_int(float f) ^ :3435:24: warning: no previous prototype for function 'ceil_to_int' comp_device_inline int ceil_to_int(float f) ^ :3440:26: warning: no previous prototype for function 'signf' comp_device_inline float signf(float f) ^ :3445:26: warning: no previous prototype for function 'nonzerof' comp_device_inline float nonzerof(float f, float eps) ^ :3453:26: warning: no previous prototype for function 'smoothstepf' comp_device_inline float smoothstepf(float f) ^ :3459:24: warning: no previous prototype for function 'modulo' comp_device_inline int modulo(int x, int m) ^ :3464:27: warning: no previous prototype for function 'float2_to_float3' comp_device_inline float3 float2_to_float3(const float2 a) ^ :3469:27: warning: no previous prototype for function 'float4_to_float3' comp_device_inline float3 float4_to_float3(const float4 a) ^ :3474:27: warning: no previous prototype for function 'float3_to_float4' comp_device_inline float4 float3_to_float4(const float3 a) ^ :4842:25: warning: no previous prototype for function 'make_orthonormals' comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) ^ :4855:27: warning: no previous prototype for function 'safe_invert_color' comp_device_inline float3 safe_invert_color(float3 a) ^ :4866:27: warning: no previous prototype for function 'safe_divide_color' comp_device_inline float3 safe_divide_color(float3 a, float3 b) ^ :4877:27: warning: no previous prototype for function 'safe_divide_even_color' comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) ^ :4915:27: warning: no previous prototype for function 'rotate_around_axis' comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) ^ :4938:26: warning: no previous prototype for function 'safe_sqrt' comp_device_inline float safe_sqrt(float f) ^ :4943:19: warning: no previous prototype for function 'safe_asin' comp_device float safe_asin(float a) ^ :4948:19: warning: no previous prototype for function 'safe_acos' comp_device float safe_acos(float a) ^ :4953:19: warning: no previous prototype for function 'compatible_pow' comp_device float compatible_pow(float x, float y) ^ :4972:19: warning: no previous prototype for function 'safe_pow' comp_device float safe_pow(float a, float b) ^ :4980:19: warning: no previous prototype for function 'safe_divide' comp_device float safe_divide(float a, float b) ^ :4985:19: warning: no previous prototype for function 'safe_log' comp_device float safe_log(float a, float b) ^ :4993:19: warning: no previous prototype for function 'safe_modulo' comp_device float safe_modulo(float a, float b) ^ :4998:26: warning: no previous prototype for function 'xor_signmask' comp_device_inline float xor_signmask(float x, int y) ^ :5016:25: warning: no previous prototype for function 'ray_plane_intersect' comp_device_inline bool ray_plane_intersect(comp_device_struct Ray* ray, float3 planePoint, float3 planeNormal, float* length) { ^ :9918:26: warning: no previous prototype for function '_srgb_to_linear' comp_device_inline float _srgb_to_linear(float c) ^ :9926:26: warning: no previous prototype for function '_linear_to_srgb' comp_device_inline float _linear_to_srgb(float c) ^ :9934:27: warning: no previous prototype for function 'linear_to_hsv' comp_device_inline float4 linear_to_hsv(float4 rgb) ^ :9973:27: warning: no previous prototype for function 'hsv_to_linear' comp_device_inline float4 hsv_to_linear(float4 hsv) ^ :10085:27: warning: no previous prototype for function 'srgb_to_linear' comp_device_inline float4 srgb_to_linear(float4 c) ^ :10093:27: warning: no previous prototype for function 'linear_to_srgb' comp_device_inline float4 linear_to_srgb(float4 c) ^ :10101:27: warning: no previous prototype for function 'linear_to_ycc_itu_bt601' comp_device_inline float4 linear_to_ycc_itu_bt601(float4 c) ^ :10111:27: warning: no previous prototype for function 'linear_to_yuv' comp_device_inline float4 linear_to_yuv(float4 c) ^ :10138:26: warning: no previous prototype for function 'linear_rgb_to_gray' comp_device_inline float linear_rgb_to_gray(float4 c) ^ :10143:26: warning: no previous prototype for function 'component' comp_device_inline float component(float4 c, uint channel) { ^ :10169:26: warning: no previous prototype for function 'get_random_float' comp_device_inline float get_random_float(KernelGlobals * kg) //uniform between 0-1 ^ :10184:71: warning: no previous prototype for function 'kernel_tex_info' comp_device_inline comp_device_global comp_device_struct TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { ^ :10188:24: warning: no previous prototype for function 'wrap_periodic' comp_device_inline int wrap_periodic(int x, int width) ^ :10196:24: warning: no previous prototype for function 'wrap_clamp' comp_device_inline int wrap_clamp(int x, int width) ^ :10201:26: warning: no previous prototype for function 'frac' comp_device_inline float frac(float x, int *ix) ^ :10209:26: warning: no previous prototype for function 'kernel_tex_image_interp' comp_device_inline float kernel_tex_image_interp(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10354:27: warning: no previous prototype for function 'kernel_tex_image_interp_4' comp_device_inline float4 kernel_tex_image_interp_4(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10510:68: warning: no previous prototype for function 'kernel_viewport' comp_device_inline comp_device_global comp_device_struct Viewport* kernel_viewport(KernelGlobals *kg, uint id) { ^ :10517:25: warning: no previous prototype for function 'viewport_to_ray' comp_device_inline void viewport_to_ray(comp_in(comp_device_struct KernelGlobals, kg), int viewport_id, comp_in(float2, coordinates), comp_device_struct Ray* ray) { ^ :10560:27: warning: no previous prototype for function 'ray_to_viewport' comp_device_inline float2 ray_to_viewport(comp_in(comp_device_struct KernelGlobals, kg), comp_device_struct Ray* ray, int viewport_id) { ^ :10762:25: warning: no previous prototype for function 'eval_program' comp_device_inline void eval_program( ^ :12009:17: error: parameter may not be qualified with an address space __global uint4 program[], ^ :12010:35: error: parameter may not be qualified with an address space uint program_size, __local uint4 local_program[], __global char *buffer1 ^ CLERROR[-11]: CL_BUILD_PROGRAM_FAILURE // -------------------------------------------------------- // START #include "kernel/kernels/opencl/kernel_opencl_defines.h" // -------------------------------------------------------- #define _KERNEL_OPENCL_ #define COMP_NAMESPACE_BEGIN #define COMP_NAMESPACE_END #define comp_device_global __global #define comp_device_inline #define comp_device_noinline #define comp_device #define comp_device_struct struct #define comp_get_ref(var) *var #define comp_in(type, var) type var #define comp_out(type, var) type *var #define comp_inout(type, var) type *var #define comp_inout_array(type, var) type var #define kernel_assert(cond) #define UINT(value) ((uint)(value)) #define comp_attribute_packed __attribute__ ((packed)) #define ATTR_FALLTHROUGH ((void)0) //#define COMP_OPENCL_COPY_TO_LOCAL //#undef COMP_OPENCL_COPY_TO_LOCAL // END #include "kernel/kernels/opencl/kernel_opencl_defines.h" // -------------------------------------------------------- // START #include "kernel/kernel_compat_opencl.h" // -------------------------------------------------------- #ifndef _KERNEL_COMPAT_OPENCL_H_ #define _KERNEL_COMPAT_OPENCL_H_ #endif // END #include "kernel/kernel_compat_opencl.h" // -------------------------------------------------------- // START #include "kernel/kernel.h" // -------------------------------------------------------- // -------------------------------------------------------- // START #include "kernel/kernel_globals.h" // -------------------------------------------------------- #ifndef _KERNEL_GLOBALS_H_ #define _KERNEL_GLOBALS_H_ #ifdef _KERNEL_CPU_ # include "util/util_vector.h" #endif // -------------------------------------------------------- // START #include "util/util_viewport.h" // -------------------------------------------------------- #ifndef _UTIL_VIEWPORT_H_ #define _UTIL_VIEWPORT_H_ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN // SYNC with DNA_camera_types enum { // Blender Camera Emulation CAM_PERSP = 0, CAM_ORTHO = 1, CAM_PANO = 2, // Viewport settings. CAM_PLANE = 3, }; struct Viewport { float3 position; float3 direction; float3 up_vector; float3 side_vector; float3 plane_position; int type; union{ struct { float scale; }; struct { float scale_x; float scale_y; }; }; union { // CAM_PERSP struct { float lens; float sensor_size; float field_of_view; }; // CAM_ORTHO struct { }; }; float padding[3]; }; COMP_NAMESPACE_END #endif // END #include "util/util_viewport.h" COMP_NAMESPACE_BEGIN // Start section - the next structs are shared with the Host typedef struct comp_attribute_packed MemoryPTR { uint index; uint padding; ulong offset; } MemoryPTR; typedef struct comp_attribute_packed TextureInfo { MemoryPTR data_ptr; uint components; uint width; uint height; uint interpolation; uint extension; uint padding; } TextureInfo; typedef struct HostKernelGlobals { MemoryPTR texture_ptr; MemoryPTR viewport_ptr; uint num_samples; uint frame_number; uint2 output_dimension; float aspect_ratio; uint seed; } HostKernelGlobals; // End section #ifdef _KERNEL_CPU_ typedef struct KernelGlobals { uint4 *_program; char* buffers[1]; MemoryPTR texture_ptr; MemoryPTR viewport_ptr; int num_samples; int frame_number; float aspect_ratio; uint seed; float2 pixel_size; } KernelGlobals; #endif #ifdef _KERNEL_OPENCL_ typedef struct KernelGlobals { __local uint4 *_program; __global char* buffers[1]; MemoryPTR texture_ptr; MemoryPTR viewport_ptr; int num_samples; int frame_number; float aspect_ratio; uint seed; float2 pixel_size; } KernelGlobals; #endif COMP_NAMESPACE_END #endif // END #include "kernel/kernel_globals.h" // -------------------------------------------------------- // START #include "kernel/kernel_types.h" // -------------------------------------------------------- COMP_NAMESPACE_BEGIN struct CompositorData { float2 coord; int sample_number; }; struct Ray { float3 P; /* origin */ float3 D; /* direction */ // float3 up; /* up vector */ float3 side; /* side vector */ float store_float1; /* store to transfer data from ray manipulator to operation */ }; COMP_NAMESPACE_END // END #include "kernel/kernel_types.h" // -------------------------------------------------------- // START #include "util/util_math.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_H_ #define _UTIL_MATH_H_ /* Math * * Basic math functions on scalar and vector types. This header is used by * both the kernel code when compiled as C++, and other C++ non-kernel code. */ #ifdef _KERNEL_CPU_ # include # include # include # include #endif /* _KERNEL_CPU_ */ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN /* Float Pi variations */ /* Division */ #ifndef M_PI_F # define M_PI_F (3.1415926535897932f) /* pi */ #endif #ifndef M_PI_2_F # define M_PI_2_F (1.5707963267948966f) /* pi/2 */ #endif #ifndef M_PI_4_F # define M_PI_4_F (0.7853981633974830f) /* pi/4 */ #endif #ifndef M_1_PI_F # define M_1_PI_F (0.3183098861837067f) /* 1/pi */ #endif #ifndef M_2_PI_F # define M_2_PI_F (0.6366197723675813f) /* 2/pi */ #endif /* Multiplication */ #ifndef M_2PI_F # define M_2PI_F (6.2831853071795864f) /* 2*pi */ #endif #ifndef M_4PI_F # define M_4PI_F (12.566370614359172f) /* 4*pi */ #endif /* Float sqrt variations */ #ifndef M_SQRT2_F # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif #ifndef M_LN2_F # define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif #ifndef M_LN10_F # define M_LN10_F (2.3025850929940457f) /* ln(10) */ #endif /* Scalar */ #ifdef _WIN32 # ifndef __KERNEL_OPENCL__ comp_device_inline float fmaxf(float a, float b) { return (a > b)? a: b; } comp_device_inline float fminf(float a, float b) { return (a < b)? a: b; } # endif /* !__KERNEL_OPENCL__ */ #endif /* _WIN32 */ #ifdef _KERNEL_CPU_ using std::isfinite; using std::isnan; using std::fabs; #define floor(var) floorf(var) #define ceil(var) ceilf(var) #define cos(var) cosf(var) #define sin(var) sinf(var) #define sqrt(var) sqrtf(var) #define asin(var) asinf(var) #define acos(var) acos(var) #define mod(var1, var2) fmodf(var1, var2) #define pow(var1, var2) powf(var1, var2) #define log(var) logf(var) comp_device_inline int max(int a, int b) { return (a > b)? a: b; } comp_device_inline int min(int a, int b) { return (a < b)? a: b; } comp_device_inline float max(float a, float b) { return (a > b)? a: b; } comp_device_inline float min(float a, float b) { return (a < b)? a: b; } comp_device_inline double max(double a, double b) { return (a > b)? a: b; } comp_device_inline double min(double a, double b) { return (a < b)? a: b; } /* These 2 guys are templated for usage with registers data. * * NOTE: Since this is CPU-only functions it is ok to use references here. * But for other devices we'll need to be careful about this. */ template comp_device_inline T min4(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } template comp_device_inline T max4(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define abs(x) fabs(x) #define mod(x, y) fmod(x, y) #endif /* _KERNEL_OPENCL_ */ comp_device_inline float min4(float a, float b, float c, float d) { return min(min(a, b), min(c, d)); } comp_device_inline float max4(float a, float b, float c, float d) { return max(max(a, b), max(c, d)); } #ifdef _KERNEL_CPU_ /* Int/Float conversion */ comp_device_inline int uint_as_int(uint i) { union { uint ui; int i; } u; u.ui = i; return u.i; } comp_device_inline uint int_as_uint(int i) { union { uint ui; int i; } u; u.i = i; return u.ui; } comp_device_inline int float_as_int(float f) { union { int i; float f; } u; u.f = f; return u.i; } comp_device_inline float int_as_float(int i) { union { int i; float f; } u; u.i = i; return u.f; } comp_device_inline uint float_as_uint(float f) { union { uint i; float f; } u; u.f = f; return u.i; } comp_device_inline float uint_as_float(uint i) { union { uint i; float f; } u; u.i = i; return u.f; } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define uint_as_int(i) as_int(i) #define int_as_uint(i) as_uint(i) #define float_as_int(f) as_int(f) #define int_as_float(i) as_float(i) #define float_as_uint(f) as_uint(f) #define uint_as_float(i) as_float(i) #endif /* Versions of functions which are safe for fast math. */ comp_device_inline bool isnan_safe(float f) { uint x = float_as_uint(f); return (x << 1) > 0xff000000u; } // comp_device_inline bool isfinite_safe(float f) // { // /* By IEEE 754 rule, 2*Inf equals Inf */ // uint x = float_as_uint(f); // return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u); // } // comp_device_inline float ensure_finite(float v) // { // return isfinite_safe(v)? v : 0.0f; // } #ifdef _KERNEL_CPU_ comp_device_inline int clamp(int a, int mn, int mx) { return min(max(a, mn), mx); } comp_device_inline float clamp(float a, float mn, float mx) { return min(max(a, mn), mx); } comp_device_inline float mix(float a, float b, float t) { return a + t*(b - a); } #endif /* __KERNEL_OPENCL__ */ comp_device_inline float saturate(float a) { return clamp(a, 0.0f, 1.0f); } #ifdef _KERNEL_CPU_ comp_device_inline int float_to_int(float f) { return (int)f; } #endif #ifdef _KERNEL_OPENCL_ comp_device_inline int float_to_int(float f) { return convert_int(f); } #endif /* _KERNEL_OPENCL_ */ comp_device_inline int floor_to_int(float f) { return float_to_int(floor(f)); } comp_device_inline int ceil_to_int(float f) { return float_to_int(ceil(f)); } comp_device_inline float signf(float f) { return (f < 0.0f)? -1.0f: 1.0f; } comp_device_inline float nonzerof(float f, float eps) { if(abs(f) < eps) return signf(f)*eps; else return f; } comp_device_inline float smoothstepf(float f) { float ff = f*f; return (3.0f*ff - 2.0f*ff*f); } comp_device_inline int modulo(int x, int m) { return (x % m + m) % m; } comp_device_inline float3 float2_to_float3(const float2 a) { return make_float3(a.x, a.y, 0.0f); } comp_device_inline float3 float4_to_float3(const float4 a) { return make_float3(a.x, a.y, a.z); } comp_device_inline float4 float3_to_float4(const float3 a) { return make_float4(a.x, a.y, a.z, 1.0f); } COMP_NAMESPACE_END // -------------------------------------------------------- // START #include "util/util_math_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT2_H_ #define _UTIL_MATH_INT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b); comp_device_inline int2 operator+(const int2 &a, const int2 &b); comp_device_inline int2 operator+=(int2 &a, const int2 &b); comp_device_inline int2 operator-(const int2 &a, const int2 &b); comp_device_inline int2 operator*(const int2 &a, const int2 &b); comp_device_inline int2 operator/(const int2 &a, const int2 &b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b) { return (a.x == b.x && a.y == b.y); } comp_device_inline int2 operator+(const int2 &a, const int2 &b) { return make_int2(a.x + b.x, a.y + b.y); } comp_device_inline int2 operator+=(int2 &a, const int2 &b) { return a = a + b; } comp_device_inline int2 operator-(const int2 &a, const int2 &b) { return make_int2(a.x - b.x, a.y - b.y); } comp_device_inline int2 operator*(const int2 &a, const int2 &b) { return make_int2(a.x * b.x, a.y * b.y); } comp_device_inline int2 operator/(const int2 &a, const int2 &b) { return make_int2(a.x / b.x, a.y / b.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT2_H_ */ // END #include "util/util_math_int2.h" // -------------------------------------------------------- // START #include "util/util_math_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT3_H_ #define _UTIL_MATH_INT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b); comp_device_inline int3 max(int3 a, int3 b); comp_device_inline int3 clamp(const int3& a, int mn, int mx); comp_device_inline int3 clamp(const int3& a, int3& mn, int mx); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_min_epi32(a.m128, b.m128)); #else return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline int3 max(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_max_epi32(a.m128, b.m128)); #else return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline int3 clamp(const int3& a, int mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, make_int3(mn)), make_int3(mx)); #else return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); #endif } comp_device_inline int3 clamp(const int3& a, int3& mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, mn), make_int3(mx)); #else return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT3_H_ */ // END #include "util/util_math_int3.h" // -------------------------------------------------------- // START #include "util/util_math_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT4_H_ #define _UTIL_MATH_INT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b); comp_device_inline int4 operator+=(int4& a, const int4& b); comp_device_inline int4 operator>>(const int4& a, int i); comp_device_inline int4 min(int4 a, int4 b); comp_device_inline int4 max(int4 a, int4 b); comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx); comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ return int4(_mm_add_epi32(a.m128, b.m128)); #else return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline int4 operator+=(int4& a, const int4& b) { return a = a + b; } comp_device_inline int4 operator>>(const int4& a, int i) { #ifdef __KERNEL_SSE__ return int4(_mm_srai_epi32(a.m128, i)); #else return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif } comp_device_inline int4 min(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_min_epi32(a.m128, b.m128)); #else return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline int4 max(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_max_epi32(a.m128, b.m128)); #else return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) { return min(max(a, mn), mx); } comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ const __m128 m = _mm_cvtepi32_ps(mask); /* TODO(sergey): avoid cvt. */ return int4(_mm_castps_si128( _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); #else return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline int4 load_int4(const int *v) { #ifdef __KERNEL_SSE__ return int4(_mm_loadu_si128((__m128i*)v)); #else return make_int4(v[0], v[1], v[2], v[3]); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT4_H_ */ // END #include "util/util_math_int4.h" // -------------------------------------------------------- // START #include "util/util_math_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT2_H_ #define _UTIL_MATH_FLOAT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a); comp_device_inline float2 operator*(const float2& a, const float2& b); comp_device_inline float2 operator*(const float2& a, float f); comp_device_inline float2 operator*(float f, const float2& a); comp_device_inline float2 operator/(float f, const float2& a); comp_device_inline float2 operator/(const float2& a, float f); comp_device_inline float2 operator/(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float b); comp_device_inline float2 operator-(const float2& a, const float2& b); comp_device_inline float2 operator+=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, float f); comp_device_inline float2 operator/=(float2& a, const float2& b); comp_device_inline float2 operator/=(float2& a, float f); comp_device_inline bool operator==(const float2& a, const float2& b); comp_device_inline bool operator!=(const float2& a, const float2& b); comp_device_inline bool is_zero(const float2& a); comp_device_inline float average(const float2& a); comp_device_inline float dot(const float2& a, const float2& b); comp_device_inline float cross(const float2& a, const float2& b); comp_device_inline float len(const float2& a); comp_device_inline float2 normalize(const float2& a); comp_device_inline float2 normalize_len(const float2& a, float *t); comp_device_inline float2 safe_normalize(const float2& a); comp_device_inline float2 min(const float2& a, const float2& b); comp_device_inline float2 max(const float2& a, const float2& b); comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx); comp_device_inline float2 fabs(const float2& a); comp_device_inline float2 as_float2(const float4& a); comp_device_inline float2 mix(const float2& a, const float2& b, float t); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #define length_v2(a) sqrt(dot(a, a)) #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a) { return make_float2(-a.x, -a.y); } comp_device_inline float2 operator*(const float2& a, const float2& b) { return make_float2(a.x*b.x, a.y*b.y); } comp_device_inline float2 operator*(const float2& a, float f) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator*(float f, const float2& a) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator/(float f, const float2& a) { return make_float2(f/a.x, f/a.y); } comp_device_inline float2 operator/(const float2& a, float f) { float invf = 1.0f/f; return make_float2(a.x*invf, a.y*invf); } comp_device_inline float2 operator/(const float2& a, const float2& b) { return make_float2(a.x/b.x, a.y/b.y); } comp_device_inline float2 operator+(const float2& a, const float2& b) { return make_float2(a.x+b.x, a.y+b.y); } comp_device_inline float2 operator+(const float2& a, float b) { return make_float2(a.x+b, a.y+b); } comp_device_inline float2 operator-(const float2& a, const float2& b) { return make_float2(a.x-b.x, a.y-b.y); } comp_device_inline float2 operator+=(float2& a, const float2& b) { return a = a + b; } comp_device_inline float2 operator*=(float2& a, const float2& b) { return a = a * b; } comp_device_inline float2 operator*=(float2& a, float f) { return a = a * f; } comp_device_inline float2 operator/=(float2& a, const float2& b) { return a = a / b; } comp_device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float2& a, const float2& b) { return (a.x == b.x && a.y == b.y); } comp_device_inline bool operator!=(const float2& a, const float2& b) { return !(a == b); } comp_device_inline bool is_zero(const float2& a) { return (a.x == 0.0f && a.y == 0.0f); } comp_device_inline float average(const float2& a) { return (a.x + a.y)*(1.0f/2.0f); } comp_device_inline float dot(const float2& a, const float2& b) { return a.x*b.x + a.y*b.y; } comp_device_inline float cross(const float2& a, const float2& b) { return (a.x*b.y - a.y*b.x); } comp_device_inline float len(const float2& a) { return sqrt(dot(a, a)); } comp_device_inline float2 normalize(const float2& a) { return a/len(a); } comp_device_inline float2 normalize_len(const float2& a, float *t) { *t = len(a); return a/(*t); } comp_device_inline float2 safe_normalize(const float2& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float2 min(const float2& a, const float2& b) { return make_float2(min(a.x, b.x), min(a.y, b.y)); } comp_device_inline float2 max(const float2& a, const float2& b) { return make_float2(max(a.x, b.x), max(a.y, b.y)); } comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx) { return min(max(a, mn), mx); } comp_device_inline float2 fabs(const float2& a) { return make_float2(abs(a.x), abs(a.y)); } comp_device_inline float2 as_float2(const float4& a) { return make_float2(a.x, a.y); } comp_device_inline float2 mix(const float2& a, const float2& b, float t) { return a + t*(b - a); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* __UTIL_MATH_FLOAT2_H__ */ // END #include "util/util_math_float2.h" // -------------------------------------------------------- // START #include "util/util_math_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT3_H_ #define _UTIL_MATH_FLOAT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a); comp_device_inline float3 operator*(const float3& a, const float3& b); comp_device_inline float3 operator*(const float3& a, const float f); comp_device_inline float3 operator*(const float f, const float3& a); comp_device_inline float3 operator/(const float f, const float3& a); comp_device_inline float3 operator/(const float3& a, const float f); comp_device_inline float3 operator/(const float3& a, const float3& b); comp_device_inline float3 operator+(const float3& a, const float3& b); comp_device_inline float3 operator-(const float3& a, const float3& b); comp_device_inline float3 operator+=(float3& a, const float3& b); comp_device_inline float3 operator-=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, float f); comp_device_inline float3 operator/=(float3& a, const float3& b); comp_device_inline float3 operator/=(float3& a, float f); comp_device_inline bool operator==(const float3& a, const float3& b); comp_device_inline bool operator!=(const float3& a, const float3& b); comp_device_inline float dot(const float3& a, const float3& b); comp_device_inline float dot_xy(const float3& a, const float3& b); comp_device_inline float3 cross(const float3& a, const float3& b); comp_device_inline float3 normalize(const float3& a); comp_device_inline float3 min(const float3& a, const float3& b); comp_device_inline float3 max(const float3& a, const float3& b); comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx); comp_device_inline float3 clamp(const float3& a, float mn, float mx); comp_device_inline float3 fabs(const float3& a); comp_device_inline float3 mix(const float3& a, const float3& b, float t); comp_device_inline float3 rcp(const float3& a); #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a); comp_device_inline float len(const float3 a); comp_device_inline float len_squared(const float3 a); #define length_v3(a) sqrt(dot(a, a)) comp_device_inline float3 saturate3(float3 a); comp_device_inline float3 safe_normalize(const float3 a); comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline bool is_zero(const float3 a); comp_device_inline float reduce_add(const float3 a); comp_device_inline float average(const float3 a); comp_device_inline bool isequal_float3(const float3 a, const float3 b); /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a) { #ifdef __KERNEL_SSE__ return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); #else return make_float3(-a.x, -a.y, -a.z); #endif } comp_device_inline float3 operator*(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,b.m128)); #else return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); #endif } comp_device_inline float3 operator*(const float3& a, const float f) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f))); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator*(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator/(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(a.m128); return float3(_mm_mul_ps(_mm_set1_ps(f),rc)); #else return make_float3(f / a.x, f / a.y, f / a.z); #endif } comp_device_inline float3 operator/(const float3& a, const float f) { float invf = 1.0f/f; return a * invf; } comp_device_inline float3 operator/(const float3& a, const float3& b) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(b.m128); return float3(_mm_mul_ps(a, rc)); #else return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); #endif } comp_device_inline float3 operator+(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_add_ps(a.m128, b.m128)); #else return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); #endif } comp_device_inline float3 operator-(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_sub_ps(a.m128, b.m128)); #else return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); #endif } comp_device_inline float3 operator+=(float3& a, const float3& b) { return a = a + b; } comp_device_inline float3 operator-=(float3& a, const float3& b) { return a = a - b; } comp_device_inline float3 operator*=(float3& a, const float3& b) { return a = a * b; } comp_device_inline float3 operator*=(float3& a, float f) { return a = a * f; } comp_device_inline float3 operator/=(float3& a, const float3& b) { return a = a / b; } comp_device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; #else return (a.x == b.x && a.y == b.y && a.z == b.z); #endif } comp_device_inline bool operator!=(const float3& a, const float3& b) { return !(a == b); } comp_device_inline float dot(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); #else return a.x*b.x + a.y*b.y + a.z*b.z; #endif } comp_device_inline float dot_xy(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b)); #else return a.x*b.x + a.y*b.y; #endif } comp_device_inline float3 cross(const float3& a, const float3& b) { float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); return r; } comp_device_inline float3 normalize(const float3& a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return float3(_mm_div_ps(a.m128, norm)); #else return a/len(a); #endif } comp_device_inline float3 min(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_min_ps(a.m128, b.m128)); #else return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline float3 max(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_max_ps(a.m128, b.m128)); #else return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx) { return min(max(a, mn), mx); } comp_device_inline float3 clamp(const float3& a, float mn, float mx) { return clamp(a, make_float3(mn,mn,mn), make_float3(mx,mx,mx)); } comp_device_inline float3 fabs(const float3& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return float3(_mm_and_ps(a.m128, mask)); #else return make_float3(fabs(a.x), fabs(a.y), fabs(a.z)); #endif } comp_device_inline float3 mix(const float3& a, const float3& b, float t) { return a + t*(b - a); } comp_device_inline float3 rcp(const float3& a) { #ifdef __KERNEL_SSE__ const float4 r(_mm_rcp_ps(a.m128)); return float3(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); #endif } #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a) { return max(max(a.x, a.y), a.z); } comp_device_inline float len(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else return sqrt(dot(a, a)); #endif } comp_device_inline float len_squared(const float3 a) { return dot(a, a); } comp_device_inline float3 saturate3(float3 a) { return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); } comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); float x = 1.0f / comp_get_ref(t); return a*x; } comp_device_inline float3 safe_normalize(const float3 a) { float t = len(a); return (t != 0.0f)? a * (1.0f/t) : a; } comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); return (comp_get_ref(t) != 0.0f)? a/(comp_get_ref(t)): a; } comp_device_inline bool is_zero(const float3 a) { #ifdef __KERNEL_SSE__ return a == make_float3(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); #endif } comp_device_inline float reduce_add(const float3 a) { return (a.x + a.y + a.z); } comp_device_inline float average(const float3 a) { return reduce_add(a)*(1.0f/3.0f); } comp_device_inline bool isequal_float3(const float3 a, const float3 b) { #ifdef _KERNEL_OPENCL_ return all(a == b); #else return a == b; #endif } COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT3_H_ */ // END #include "util/util_math_float3.h" // -------------------------------------------------------- // START #include "util/util_math_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT4_H_ #define _UTIL_MATH_FLOAT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a); comp_device_inline float4 operator*(const float4& a, const float4& b); comp_device_inline float4 operator*(const float4& a, float f); comp_device_inline float4 operator*(float f, const float4& a); comp_device_inline float4 operator/(const float4& a, float f); comp_device_inline float4 operator/(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, float f); comp_device_inline float4 operator-(const float4& a, const float4& b); comp_device_inline float4 operator-(const float4& a, const float b); comp_device_inline float4 operator+=(float4& a, const float4& b); comp_device_inline float4 operator*=(float4& a, const float4& b); comp_device_inline float4 operator/=(float4& a, float f); comp_device_inline int4 operator<(const float4& a, const float4& b); comp_device_inline int4 operator>=(const float4& a, const float4& b); comp_device_inline int4 operator<=(const float4& a, const float4& b); comp_device_inline bool operator==(const float4& a, const float4& b); comp_device_inline float dot(const float4& a, const float4& b); comp_device_inline float4 fabs(const float4& a); comp_device_inline float len_squared(const float4& a); comp_device_inline float4 rcp(const float4& a); comp_device_inline float4 cross(const float4& a, const float4& b); comp_device_inline bool is_zero(const float4& a); comp_device_inline float reduce_add(const float4& a); comp_device_inline float average(const float4& a); comp_device_inline float len(const float4& a); comp_device_inline float4 normalize(const float4& a); comp_device_inline float4 safe_normalize(const float4& a); comp_device_inline float4 min(const float4& a, const float4& b); comp_device_inline float4 max(const float4& a, const float4& b); comp_device_inline float4 mix(const float4& a, const float4& b, float t); #endif /* _KERNEL_CPU_ */ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b); template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b); # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b); template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b); # endif #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b); comp_device_inline float4 reduce_min(const float4& a); comp_device_inline float4 reduce_max(const float4& a); # if 0 comp_device_inline float4 reduce_add(const float4& a); # endif #endif /* !_KERNEL_GPU_ */ #define length_v4(a) sqrt(dot(a, a)) /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return float4(_mm_xor_ps(a.m128, mask)); #else return make_float4(-a.x, -a.y, -a.z, -a.w); #endif } comp_device_inline float4 operator*(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_mul_ps(a.m128, b.m128)); #else return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); #endif } comp_device_inline float4 operator*(const float4& a, float f) { #if defined(__KERNEL_SSE__) return a * make_float4(f); #else return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); #endif } comp_device_inline float4 operator*(float f, const float4& a) { return a * f; } comp_device_inline float4 operator/(const float4& a, float f) { return a * (1.0f/f); } comp_device_inline float4 operator/(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return a * rcp(b); #else return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_add_ps(a.m128, b.m128)); #else return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float b) { return make_float4(a.x+b, a.y+b, a.z+b, a.w+b); } comp_device_inline float4 operator-(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_sub_ps(a.m128, b.m128)); #else return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); #endif } comp_device_inline float4 operator-(const float4& a, const float b) { return make_float4(a.x-b, a.y-b, a.z-b, a.w-b); } comp_device_inline float4 operator+=(float4& a, const float4& b) { return a = a + b; } comp_device_inline float4 operator*=(float4& a, const float4& b) { return a = a * b; } comp_device_inline float4 operator/=(float4& a, float f) { return a = a / f; } comp_device_inline int4 operator<(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128))); #else return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); #endif } comp_device_inline int4 operator>=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128))); #else return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); #endif } comp_device_inline int4 operator<=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128))); #else return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); #endif } comp_device_inline bool operator==(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; #else return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); #endif } comp_device_inline float dot(const float4& a, const float4& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); #else return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); #endif } comp_device_inline float4 fabs(const float4& a) { return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); } comp_device_inline float len_squared(const float4& a) { return dot(a, a); } comp_device_inline float4 rcp(const float4& a) { #ifdef __KERNEL_SSE__ float4 r(_mm_rcp_ps(a.m128)); return float4(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); #endif } comp_device_inline float4 cross(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); #else return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); #endif } comp_device_inline bool is_zero(const float4& a) { #ifdef __KERNEL_SSE__ return a == make_float4(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); #endif } comp_device_inline float reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h(shuffle<1,0,3,2>(a) + a); /* TODO(sergey): Investigate efficiency. */ return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); #else return ((a.x + a.y) + (a.z + a.w)); #endif } comp_device_inline float average(const float4& a) { return reduce_add(a) * 0.25f; } comp_device_inline float len(const float4& a) { return sqrt(dot(a, a)); } comp_device_inline float4 normalize(const float4& a) { return a/len(a); } comp_device_inline float4 safe_normalize(const float4& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float4 min(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_min_ps(a.m128, b.m128)); #else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline float4 max(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_max_ps(a.m128, b.m128)); #else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } #endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b) { return float4(_mm_castsi128_ps( _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); } template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) { return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); } # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { return float4(_mm_moveldup_ps(b)); } template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) { return float4(_mm_movehdup_ps(b)); } # endif /* __KERNEL_SSE3__ */ #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b))); #else return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline float4 reduce_min(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = min(shuffle<1,0,3,2>(a), a); return min(shuffle<2,3,0,1>(h), h); #else return make_float4(min(min(a.x, a.y), min(a.z, a.w))); #endif } comp_device_inline float4 reduce_max(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = max(shuffle<1,0,3,2>(a), a); return max(shuffle<2,3,0,1>(h), h); #else return make_float4(max(max(a.x, a.y), max(a.z, a.w))); #endif } comp_device_inline float4 mix(const float4& a, const float4& b, float t) { return a + t*(b - a); } #if 0 comp_device_inline float4 reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = shuffle<1,0,3,2>(a) + a; return shuffle<2,3,0,1>(h) + h; #else return make_float4((a.x + a.y) + (a.z + a.w)); #endif } #endif #endif /* !_KERNEL_GPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT4_H_ */ // END #include "util/util_math_float4.h" COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ /* Interpolation */ template A lerp(const A& a, const A& b, const B& t) { return (A)(a * ((B)1 - t) + b * t); } /* Triangle */ comp_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3) { return len(cross(v3 - v2, v1 - v2))*0.5f; } #endif /* _KERNEL_CPU_ */ /* Orthonormal vectors */ comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) { if(N.x != N.y || N.x != N.z) comp_get_ref(a) = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N else comp_get_ref(a) = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N comp_get_ref(a) = normalize(comp_get_ref(a)); comp_get_ref(b) = cross(N, comp_get_ref(a)); } /* Color division */ comp_device_inline float3 safe_invert_color(float3 a) { float x, y, z; x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; /* try to get gray even if b is zero */ if(b.x == 0.0f) { if(b.y == 0.0f) { x = z; y = z; } else if(b.z == 0.0f) { x = y; z = y; } else x = 0.5f*(y + z); } else if(b.y == 0.0f) { if(b.z == 0.0f) { y = x; z = x; } else y = 0.5f*(x + z); } else if(b.z == 0.0f) { z = 0.5f*(x + y); } return make_float3(x, y, z); } /* Rotation of point around axis and angle */ comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) { float costheta = cos(angle); float sintheta = sin(angle); float3 r; r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); return r; } /* NaN-safe math ops */ comp_device_inline float safe_sqrt(float f) { return sqrt(max(f, 0.0f)); } comp_device float safe_asin(float a) { return asin(clamp(a, -1.0f, 1.0f)); } comp_device float safe_acos(float a) { return acos(clamp(a, -1.0f, 1.0f)); } comp_device float compatible_pow(float x, float y) { #ifdef _KERNEL_GPU_ if(y == 0.0f) /* x^0 -> 1, including 0^0 */ return 1.0f; /* GPU pow doesn't accept negative x, do manual checks here */ if(x < 0.0f) { if(mod(-y, 2.0f) == 0.0f) return pow(-x, y); else return -pow(-x, y); } else if(x == 0.0f) return 0.0f; #endif return pow(x, y); } comp_device float safe_pow(float a, float b) { if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_pow(a, b); } comp_device float safe_divide(float a, float b) { return (b != 0.0f)? a/b: 0.0f; } comp_device float safe_log(float a, float b) { if(UNLIKELY(a <= 0.0f || b <= 0.0f)) return 0.0f; return safe_divide(log(a),log(b)); } comp_device float safe_modulo(float a, float b) { return (b != 0.0f)? mod(a, b): 0.0f; } comp_device_inline float xor_signmask(float x, int y) { return int_as_float(float_as_int(x) ^ y); } COMP_NAMESPACE_END #endif /* _UTIL_MATH_H_ */ // END #include "util/util_math.h" // -------------------------------------------------------- // START #include "util/util_math_intersect.h" // -------------------------------------------------------- #ifndef _UTIL_MATH_INTERSECT_H_ #define _UTIL_MATH_INTERSECT_H_ COMP_NAMESPACE_BEGIN /* Ray Plane Intersection */ comp_device_inline bool ray_plane_intersect(comp_device_struct Ray* ray, float3 planePoint, float3 planeNormal, float* length) { // assume ray->D and planeNormal are unit vectors float denom = dot(planeNormal, ray->D); if (denom > 1e-6) { *length = dot(planePoint - ray->P, planeNormal) / denom; return true; } return false; } COMP_NAMESPACE_END #endif // END #include "util/util_math_intersect.h" // -------------------------------------------------------- // START #include "util/util_texture.h" // -------------------------------------------------------- /* * Copyright 2011-2016 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TEXTURE_H_ #define _UTIL_TEXTURE_H_ COMP_NAMESPACE_BEGIN COMP_NAMESPACE_END #endif /* _UTIL_TEXTURE_H_ */ // END #include "util/util_texture.h" // -------------------------------------------------------- // START #include "util/util_color.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_COLOR_H_ #define _UTIL_COLOR_H_ // -------------------------------------------------------- // START #include "util/util_math.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_H_ #define _UTIL_MATH_H_ /* Math * * Basic math functions on scalar and vector types. This header is used by * both the kernel code when compiled as C++, and other C++ non-kernel code. */ #ifdef _KERNEL_CPU_ # include # include # include # include #endif /* _KERNEL_CPU_ */ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN /* Float Pi variations */ /* Division */ #ifndef M_PI_F # define M_PI_F (3.1415926535897932f) /* pi */ #endif #ifndef M_PI_2_F # define M_PI_2_F (1.5707963267948966f) /* pi/2 */ #endif #ifndef M_PI_4_F # define M_PI_4_F (0.7853981633974830f) /* pi/4 */ #endif #ifndef M_1_PI_F # define M_1_PI_F (0.3183098861837067f) /* 1/pi */ #endif #ifndef M_2_PI_F # define M_2_PI_F (0.6366197723675813f) /* 2/pi */ #endif /* Multiplication */ #ifndef M_2PI_F # define M_2PI_F (6.2831853071795864f) /* 2*pi */ #endif #ifndef M_4PI_F # define M_4PI_F (12.566370614359172f) /* 4*pi */ #endif /* Float sqrt variations */ #ifndef M_SQRT2_F # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif #ifndef M_LN2_F # define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif #ifndef M_LN10_F # define M_LN10_F (2.3025850929940457f) /* ln(10) */ #endif /* Scalar */ #ifdef _WIN32 # ifndef __KERNEL_OPENCL__ comp_device_inline float fmaxf(float a, float b) { return (a > b)? a: b; } comp_device_inline float fminf(float a, float b) { return (a < b)? a: b; } # endif /* !__KERNEL_OPENCL__ */ #endif /* _WIN32 */ #ifdef _KERNEL_CPU_ using std::isfinite; using std::isnan; using std::fabs; #define floor(var) floorf(var) #define ceil(var) ceilf(var) #define cos(var) cosf(var) #define sin(var) sinf(var) #define sqrt(var) sqrtf(var) #define asin(var) asinf(var) #define acos(var) acos(var) #define mod(var1, var2) fmodf(var1, var2) #define pow(var1, var2) powf(var1, var2) #define log(var) logf(var) comp_device_inline int max(int a, int b) { return (a > b)? a: b; } comp_device_inline int min(int a, int b) { return (a < b)? a: b; } comp_device_inline float max(float a, float b) { return (a > b)? a: b; } comp_device_inline float min(float a, float b) { return (a < b)? a: b; } comp_device_inline double max(double a, double b) { return (a > b)? a: b; } comp_device_inline double min(double a, double b) { return (a < b)? a: b; } /* These 2 guys are templated for usage with registers data. * * NOTE: Since this is CPU-only functions it is ok to use references here. * But for other devices we'll need to be careful about this. */ template comp_device_inline T min4(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } template comp_device_inline T max4(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define abs(x) fabs(x) #define mod(x, y) fmod(x, y) #endif /* _KERNEL_OPENCL_ */ comp_device_inline float min4(float a, float b, float c, float d) { return min(min(a, b), min(c, d)); } comp_device_inline float max4(float a, float b, float c, float d) { return max(max(a, b), max(c, d)); } #ifdef _KERNEL_CPU_ /* Int/Float conversion */ comp_device_inline int uint_as_int(uint i) { union { uint ui; int i; } u; u.ui = i; return u.i; } comp_device_inline uint int_as_uint(int i) { union { uint ui; int i; } u; u.i = i; return u.ui; } comp_device_inline int float_as_int(float f) { union { int i; float f; } u; u.f = f; return u.i; } comp_device_inline float int_as_float(int i) { union { int i; float f; } u; u.i = i; return u.f; } comp_device_inline uint float_as_uint(float f) { union { uint i; float f; } u; u.f = f; return u.i; } comp_device_inline float uint_as_float(uint i) { union { uint i; float f; } u; u.i = i; return u.f; } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define uint_as_int(i) as_int(i) #define int_as_uint(i) as_uint(i) #define float_as_int(f) as_int(f) #define int_as_float(i) as_float(i) #define float_as_uint(f) as_uint(f) #define uint_as_float(i) as_float(i) #endif /* Versions of functions which are safe for fast math. */ comp_device_inline bool isnan_safe(float f) { uint x = float_as_uint(f); return (x << 1) > 0xff000000u; } // comp_device_inline bool isfinite_safe(float f) // { // /* By IEEE 754 rule, 2*Inf equals Inf */ // uint x = float_as_uint(f); // return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u); // } // comp_device_inline float ensure_finite(float v) // { // return isfinite_safe(v)? v : 0.0f; // } #ifdef _KERNEL_CPU_ comp_device_inline int clamp(int a, int mn, int mx) { return min(max(a, mn), mx); } comp_device_inline float clamp(float a, float mn, float mx) { return min(max(a, mn), mx); } comp_device_inline float mix(float a, float b, float t) { return a + t*(b - a); } #endif /* __KERNEL_OPENCL__ */ comp_device_inline float saturate(float a) { return clamp(a, 0.0f, 1.0f); } #ifdef _KERNEL_CPU_ comp_device_inline int float_to_int(float f) { return (int)f; } #endif #ifdef _KERNEL_OPENCL_ comp_device_inline int float_to_int(float f) { return convert_int(f); } #endif /* _KERNEL_OPENCL_ */ comp_device_inline int floor_to_int(float f) { return float_to_int(floor(f)); } comp_device_inline int ceil_to_int(float f) { return float_to_int(ceil(f)); } comp_device_inline float signf(float f) { return (f < 0.0f)? -1.0f: 1.0f; } comp_device_inline float nonzerof(float f, float eps) { if(abs(f) < eps) return signf(f)*eps; else return f; } comp_device_inline float smoothstepf(float f) { float ff = f*f; return (3.0f*ff - 2.0f*ff*f); } comp_device_inline int modulo(int x, int m) { return (x % m + m) % m; } comp_device_inline float3 float2_to_float3(const float2 a) { return make_float3(a.x, a.y, 0.0f); } comp_device_inline float3 float4_to_float3(const float4 a) { return make_float3(a.x, a.y, a.z); } comp_device_inline float4 float3_to_float4(const float3 a) { return make_float4(a.x, a.y, a.z, 1.0f); } COMP_NAMESPACE_END // -------------------------------------------------------- // START #include "util/util_math_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT2_H_ #define _UTIL_MATH_INT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b); comp_device_inline int2 operator+(const int2 &a, const int2 &b); comp_device_inline int2 operator+=(int2 &a, const int2 &b); comp_device_inline int2 operator-(const int2 &a, const int2 &b); comp_device_inline int2 operator*(const int2 &a, const int2 &b); comp_device_inline int2 operator/(const int2 &a, const int2 &b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b) { return (a.x == b.x && a.y == b.y); } comp_device_inline int2 operator+(const int2 &a, const int2 &b) { return make_int2(a.x + b.x, a.y + b.y); } comp_device_inline int2 operator+=(int2 &a, const int2 &b) { return a = a + b; } comp_device_inline int2 operator-(const int2 &a, const int2 &b) { return make_int2(a.x - b.x, a.y - b.y); } comp_device_inline int2 operator*(const int2 &a, const int2 &b) { return make_int2(a.x * b.x, a.y * b.y); } comp_device_inline int2 operator/(const int2 &a, const int2 &b) { return make_int2(a.x / b.x, a.y / b.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT2_H_ */ // END #include "util/util_math_int2.h" // -------------------------------------------------------- // START #include "util/util_math_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT3_H_ #define _UTIL_MATH_INT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b); comp_device_inline int3 max(int3 a, int3 b); comp_device_inline int3 clamp(const int3& a, int mn, int mx); comp_device_inline int3 clamp(const int3& a, int3& mn, int mx); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_min_epi32(a.m128, b.m128)); #else return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline int3 max(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_max_epi32(a.m128, b.m128)); #else return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline int3 clamp(const int3& a, int mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, make_int3(mn)), make_int3(mx)); #else return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); #endif } comp_device_inline int3 clamp(const int3& a, int3& mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, mn), make_int3(mx)); #else return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT3_H_ */ // END #include "util/util_math_int3.h" // -------------------------------------------------------- // START #include "util/util_math_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT4_H_ #define _UTIL_MATH_INT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b); comp_device_inline int4 operator+=(int4& a, const int4& b); comp_device_inline int4 operator>>(const int4& a, int i); comp_device_inline int4 min(int4 a, int4 b); comp_device_inline int4 max(int4 a, int4 b); comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx); comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ return int4(_mm_add_epi32(a.m128, b.m128)); #else return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline int4 operator+=(int4& a, const int4& b) { return a = a + b; } comp_device_inline int4 operator>>(const int4& a, int i) { #ifdef __KERNEL_SSE__ return int4(_mm_srai_epi32(a.m128, i)); #else return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif } comp_device_inline int4 min(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_min_epi32(a.m128, b.m128)); #else return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline int4 max(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_max_epi32(a.m128, b.m128)); #else return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) { return min(max(a, mn), mx); } comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ const __m128 m = _mm_cvtepi32_ps(mask); /* TODO(sergey): avoid cvt. */ return int4(_mm_castps_si128( _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); #else return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline int4 load_int4(const int *v) { #ifdef __KERNEL_SSE__ return int4(_mm_loadu_si128((__m128i*)v)); #else return make_int4(v[0], v[1], v[2], v[3]); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT4_H_ */ // END #include "util/util_math_int4.h" // -------------------------------------------------------- // START #include "util/util_math_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT2_H_ #define _UTIL_MATH_FLOAT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a); comp_device_inline float2 operator*(const float2& a, const float2& b); comp_device_inline float2 operator*(const float2& a, float f); comp_device_inline float2 operator*(float f, const float2& a); comp_device_inline float2 operator/(float f, const float2& a); comp_device_inline float2 operator/(const float2& a, float f); comp_device_inline float2 operator/(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float b); comp_device_inline float2 operator-(const float2& a, const float2& b); comp_device_inline float2 operator+=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, float f); comp_device_inline float2 operator/=(float2& a, const float2& b); comp_device_inline float2 operator/=(float2& a, float f); comp_device_inline bool operator==(const float2& a, const float2& b); comp_device_inline bool operator!=(const float2& a, const float2& b); comp_device_inline bool is_zero(const float2& a); comp_device_inline float average(const float2& a); comp_device_inline float dot(const float2& a, const float2& b); comp_device_inline float cross(const float2& a, const float2& b); comp_device_inline float len(const float2& a); comp_device_inline float2 normalize(const float2& a); comp_device_inline float2 normalize_len(const float2& a, float *t); comp_device_inline float2 safe_normalize(const float2& a); comp_device_inline float2 min(const float2& a, const float2& b); comp_device_inline float2 max(const float2& a, const float2& b); comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx); comp_device_inline float2 fabs(const float2& a); comp_device_inline float2 as_float2(const float4& a); comp_device_inline float2 mix(const float2& a, const float2& b, float t); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #define length_v2(a) sqrt(dot(a, a)) #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a) { return make_float2(-a.x, -a.y); } comp_device_inline float2 operator*(const float2& a, const float2& b) { return make_float2(a.x*b.x, a.y*b.y); } comp_device_inline float2 operator*(const float2& a, float f) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator*(float f, const float2& a) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator/(float f, const float2& a) { return make_float2(f/a.x, f/a.y); } comp_device_inline float2 operator/(const float2& a, float f) { float invf = 1.0f/f; return make_float2(a.x*invf, a.y*invf); } comp_device_inline float2 operator/(const float2& a, const float2& b) { return make_float2(a.x/b.x, a.y/b.y); } comp_device_inline float2 operator+(const float2& a, const float2& b) { return make_float2(a.x+b.x, a.y+b.y); } comp_device_inline float2 operator+(const float2& a, float b) { return make_float2(a.x+b, a.y+b); } comp_device_inline float2 operator-(const float2& a, const float2& b) { return make_float2(a.x-b.x, a.y-b.y); } comp_device_inline float2 operator+=(float2& a, const float2& b) { return a = a + b; } comp_device_inline float2 operator*=(float2& a, const float2& b) { return a = a * b; } comp_device_inline float2 operator*=(float2& a, float f) { return a = a * f; } comp_device_inline float2 operator/=(float2& a, const float2& b) { return a = a / b; } comp_device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float2& a, const float2& b) { return (a.x == b.x && a.y == b.y); } comp_device_inline bool operator!=(const float2& a, const float2& b) { return !(a == b); } comp_device_inline bool is_zero(const float2& a) { return (a.x == 0.0f && a.y == 0.0f); } comp_device_inline float average(const float2& a) { return (a.x + a.y)*(1.0f/2.0f); } comp_device_inline float dot(const float2& a, const float2& b) { return a.x*b.x + a.y*b.y; } comp_device_inline float cross(const float2& a, const float2& b) { return (a.x*b.y - a.y*b.x); } comp_device_inline float len(const float2& a) { return sqrt(dot(a, a)); } comp_device_inline float2 normalize(const float2& a) { return a/len(a); } comp_device_inline float2 normalize_len(const float2& a, float *t) { *t = len(a); return a/(*t); } comp_device_inline float2 safe_normalize(const float2& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float2 min(const float2& a, const float2& b) { return make_float2(min(a.x, b.x), min(a.y, b.y)); } comp_device_inline float2 max(const float2& a, const float2& b) { return make_float2(max(a.x, b.x), max(a.y, b.y)); } comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx) { return min(max(a, mn), mx); } comp_device_inline float2 fabs(const float2& a) { return make_float2(abs(a.x), abs(a.y)); } comp_device_inline float2 as_float2(const float4& a) { return make_float2(a.x, a.y); } comp_device_inline float2 mix(const float2& a, const float2& b, float t) { return a + t*(b - a); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* __UTIL_MATH_FLOAT2_H__ */ // END #include "util/util_math_float2.h" // -------------------------------------------------------- // START #include "util/util_math_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT3_H_ #define _UTIL_MATH_FLOAT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a); comp_device_inline float3 operator*(const float3& a, const float3& b); comp_device_inline float3 operator*(const float3& a, const float f); comp_device_inline float3 operator*(const float f, const float3& a); comp_device_inline float3 operator/(const float f, const float3& a); comp_device_inline float3 operator/(const float3& a, const float f); comp_device_inline float3 operator/(const float3& a, const float3& b); comp_device_inline float3 operator+(const float3& a, const float3& b); comp_device_inline float3 operator-(const float3& a, const float3& b); comp_device_inline float3 operator+=(float3& a, const float3& b); comp_device_inline float3 operator-=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, float f); comp_device_inline float3 operator/=(float3& a, const float3& b); comp_device_inline float3 operator/=(float3& a, float f); comp_device_inline bool operator==(const float3& a, const float3& b); comp_device_inline bool operator!=(const float3& a, const float3& b); comp_device_inline float dot(const float3& a, const float3& b); comp_device_inline float dot_xy(const float3& a, const float3& b); comp_device_inline float3 cross(const float3& a, const float3& b); comp_device_inline float3 normalize(const float3& a); comp_device_inline float3 min(const float3& a, const float3& b); comp_device_inline float3 max(const float3& a, const float3& b); comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx); comp_device_inline float3 clamp(const float3& a, float mn, float mx); comp_device_inline float3 fabs(const float3& a); comp_device_inline float3 mix(const float3& a, const float3& b, float t); comp_device_inline float3 rcp(const float3& a); #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a); comp_device_inline float len(const float3 a); comp_device_inline float len_squared(const float3 a); #define length_v3(a) sqrt(dot(a, a)) comp_device_inline float3 saturate3(float3 a); comp_device_inline float3 safe_normalize(const float3 a); comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline bool is_zero(const float3 a); comp_device_inline float reduce_add(const float3 a); comp_device_inline float average(const float3 a); comp_device_inline bool isequal_float3(const float3 a, const float3 b); /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a) { #ifdef __KERNEL_SSE__ return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); #else return make_float3(-a.x, -a.y, -a.z); #endif } comp_device_inline float3 operator*(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,b.m128)); #else return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); #endif } comp_device_inline float3 operator*(const float3& a, const float f) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f))); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator*(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator/(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(a.m128); return float3(_mm_mul_ps(_mm_set1_ps(f),rc)); #else return make_float3(f / a.x, f / a.y, f / a.z); #endif } comp_device_inline float3 operator/(const float3& a, const float f) { float invf = 1.0f/f; return a * invf; } comp_device_inline float3 operator/(const float3& a, const float3& b) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(b.m128); return float3(_mm_mul_ps(a, rc)); #else return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); #endif } comp_device_inline float3 operator+(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_add_ps(a.m128, b.m128)); #else return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); #endif } comp_device_inline float3 operator-(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_sub_ps(a.m128, b.m128)); #else return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); #endif } comp_device_inline float3 operator+=(float3& a, const float3& b) { return a = a + b; } comp_device_inline float3 operator-=(float3& a, const float3& b) { return a = a - b; } comp_device_inline float3 operator*=(float3& a, const float3& b) { return a = a * b; } comp_device_inline float3 operator*=(float3& a, float f) { return a = a * f; } comp_device_inline float3 operator/=(float3& a, const float3& b) { return a = a / b; } comp_device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; #else return (a.x == b.x && a.y == b.y && a.z == b.z); #endif } comp_device_inline bool operator!=(const float3& a, const float3& b) { return !(a == b); } comp_device_inline float dot(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); #else return a.x*b.x + a.y*b.y + a.z*b.z; #endif } comp_device_inline float dot_xy(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b)); #else return a.x*b.x + a.y*b.y; #endif } comp_device_inline float3 cross(const float3& a, const float3& b) { float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); return r; } comp_device_inline float3 normalize(const float3& a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return float3(_mm_div_ps(a.m128, norm)); #else return a/len(a); #endif } comp_device_inline float3 min(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_min_ps(a.m128, b.m128)); #else return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline float3 max(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_max_ps(a.m128, b.m128)); #else return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx) { return min(max(a, mn), mx); } comp_device_inline float3 clamp(const float3& a, float mn, float mx) { return clamp(a, make_float3(mn,mn,mn), make_float3(mx,mx,mx)); } comp_device_inline float3 fabs(const float3& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return float3(_mm_and_ps(a.m128, mask)); #else return make_float3(fabs(a.x), fabs(a.y), fabs(a.z)); #endif } comp_device_inline float3 mix(const float3& a, const float3& b, float t) { return a + t*(b - a); } comp_device_inline float3 rcp(const float3& a) { #ifdef __KERNEL_SSE__ const float4 r(_mm_rcp_ps(a.m128)); return float3(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); #endif } #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a) { return max(max(a.x, a.y), a.z); } comp_device_inline float len(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else return sqrt(dot(a, a)); #endif } comp_device_inline float len_squared(const float3 a) { return dot(a, a); } comp_device_inline float3 saturate3(float3 a) { return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); } comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); float x = 1.0f / comp_get_ref(t); return a*x; } comp_device_inline float3 safe_normalize(const float3 a) { float t = len(a); return (t != 0.0f)? a * (1.0f/t) : a; } comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); return (comp_get_ref(t) != 0.0f)? a/(comp_get_ref(t)): a; } comp_device_inline bool is_zero(const float3 a) { #ifdef __KERNEL_SSE__ return a == make_float3(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); #endif } comp_device_inline float reduce_add(const float3 a) { return (a.x + a.y + a.z); } comp_device_inline float average(const float3 a) { return reduce_add(a)*(1.0f/3.0f); } comp_device_inline bool isequal_float3(const float3 a, const float3 b) { #ifdef _KERNEL_OPENCL_ return all(a == b); #else return a == b; #endif } COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT3_H_ */ // END #include "util/util_math_float3.h" // -------------------------------------------------------- // START #include "util/util_math_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT4_H_ #define _UTIL_MATH_FLOAT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a); comp_device_inline float4 operator*(const float4& a, const float4& b); comp_device_inline float4 operator*(const float4& a, float f); comp_device_inline float4 operator*(float f, const float4& a); comp_device_inline float4 operator/(const float4& a, float f); comp_device_inline float4 operator/(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, float f); comp_device_inline float4 operator-(const float4& a, const float4& b); comp_device_inline float4 operator-(const float4& a, const float b); comp_device_inline float4 operator+=(float4& a, const float4& b); comp_device_inline float4 operator*=(float4& a, const float4& b); comp_device_inline float4 operator/=(float4& a, float f); comp_device_inline int4 operator<(const float4& a, const float4& b); comp_device_inline int4 operator>=(const float4& a, const float4& b); comp_device_inline int4 operator<=(const float4& a, const float4& b); comp_device_inline bool operator==(const float4& a, const float4& b); comp_device_inline float dot(const float4& a, const float4& b); comp_device_inline float4 fabs(const float4& a); comp_device_inline float len_squared(const float4& a); comp_device_inline float4 rcp(const float4& a); comp_device_inline float4 cross(const float4& a, const float4& b); comp_device_inline bool is_zero(const float4& a); comp_device_inline float reduce_add(const float4& a); comp_device_inline float average(const float4& a); comp_device_inline float len(const float4& a); comp_device_inline float4 normalize(const float4& a); comp_device_inline float4 safe_normalize(const float4& a); comp_device_inline float4 min(const float4& a, const float4& b); comp_device_inline float4 max(const float4& a, const float4& b); comp_device_inline float4 mix(const float4& a, const float4& b, float t); #endif /* _KERNEL_CPU_ */ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b); template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b); # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b); template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b); # endif #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b); comp_device_inline float4 reduce_min(const float4& a); comp_device_inline float4 reduce_max(const float4& a); # if 0 comp_device_inline float4 reduce_add(const float4& a); # endif #endif /* !_KERNEL_GPU_ */ #define length_v4(a) sqrt(dot(a, a)) /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return float4(_mm_xor_ps(a.m128, mask)); #else return make_float4(-a.x, -a.y, -a.z, -a.w); #endif } comp_device_inline float4 operator*(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_mul_ps(a.m128, b.m128)); #else return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); #endif } comp_device_inline float4 operator*(const float4& a, float f) { #if defined(__KERNEL_SSE__) return a * make_float4(f); #else return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); #endif } comp_device_inline float4 operator*(float f, const float4& a) { return a * f; } comp_device_inline float4 operator/(const float4& a, float f) { return a * (1.0f/f); } comp_device_inline float4 operator/(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return a * rcp(b); #else return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_add_ps(a.m128, b.m128)); #else return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float b) { return make_float4(a.x+b, a.y+b, a.z+b, a.w+b); } comp_device_inline float4 operator-(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_sub_ps(a.m128, b.m128)); #else return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); #endif } comp_device_inline float4 operator-(const float4& a, const float b) { return make_float4(a.x-b, a.y-b, a.z-b, a.w-b); } comp_device_inline float4 operator+=(float4& a, const float4& b) { return a = a + b; } comp_device_inline float4 operator*=(float4& a, const float4& b) { return a = a * b; } comp_device_inline float4 operator/=(float4& a, float f) { return a = a / f; } comp_device_inline int4 operator<(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128))); #else return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); #endif } comp_device_inline int4 operator>=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128))); #else return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); #endif } comp_device_inline int4 operator<=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128))); #else return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); #endif } comp_device_inline bool operator==(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; #else return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); #endif } comp_device_inline float dot(const float4& a, const float4& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); #else return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); #endif } comp_device_inline float4 fabs(const float4& a) { return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); } comp_device_inline float len_squared(const float4& a) { return dot(a, a); } comp_device_inline float4 rcp(const float4& a) { #ifdef __KERNEL_SSE__ float4 r(_mm_rcp_ps(a.m128)); return float4(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); #endif } comp_device_inline float4 cross(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); #else return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); #endif } comp_device_inline bool is_zero(const float4& a) { #ifdef __KERNEL_SSE__ return a == make_float4(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); #endif } comp_device_inline float reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h(shuffle<1,0,3,2>(a) + a); /* TODO(sergey): Investigate efficiency. */ return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); #else return ((a.x + a.y) + (a.z + a.w)); #endif } comp_device_inline float average(const float4& a) { return reduce_add(a) * 0.25f; } comp_device_inline float len(const float4& a) { return sqrt(dot(a, a)); } comp_device_inline float4 normalize(const float4& a) { return a/len(a); } comp_device_inline float4 safe_normalize(const float4& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float4 min(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_min_ps(a.m128, b.m128)); #else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline float4 max(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_max_ps(a.m128, b.m128)); #else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } #endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b) { return float4(_mm_castsi128_ps( _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); } template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) { return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); } # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { return float4(_mm_moveldup_ps(b)); } template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) { return float4(_mm_movehdup_ps(b)); } # endif /* __KERNEL_SSE3__ */ #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b))); #else return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline float4 reduce_min(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = min(shuffle<1,0,3,2>(a), a); return min(shuffle<2,3,0,1>(h), h); #else return make_float4(min(min(a.x, a.y), min(a.z, a.w))); #endif } comp_device_inline float4 reduce_max(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = max(shuffle<1,0,3,2>(a), a); return max(shuffle<2,3,0,1>(h), h); #else return make_float4(max(max(a.x, a.y), max(a.z, a.w))); #endif } comp_device_inline float4 mix(const float4& a, const float4& b, float t) { return a + t*(b - a); } #if 0 comp_device_inline float4 reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = shuffle<1,0,3,2>(a) + a; return shuffle<2,3,0,1>(h) + h; #else return make_float4((a.x + a.y) + (a.z + a.w)); #endif } #endif #endif /* !_KERNEL_GPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT4_H_ */ // END #include "util/util_math_float4.h" COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ /* Interpolation */ template A lerp(const A& a, const A& b, const B& t) { return (A)(a * ((B)1 - t) + b * t); } /* Triangle */ comp_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3) { return len(cross(v3 - v2, v1 - v2))*0.5f; } #endif /* _KERNEL_CPU_ */ /* Orthonormal vectors */ comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) { if(N.x != N.y || N.x != N.z) comp_get_ref(a) = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N else comp_get_ref(a) = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N comp_get_ref(a) = normalize(comp_get_ref(a)); comp_get_ref(b) = cross(N, comp_get_ref(a)); } /* Color division */ comp_device_inline float3 safe_invert_color(float3 a) { float x, y, z; x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; /* try to get gray even if b is zero */ if(b.x == 0.0f) { if(b.y == 0.0f) { x = z; y = z; } else if(b.z == 0.0f) { x = y; z = y; } else x = 0.5f*(y + z); } else if(b.y == 0.0f) { if(b.z == 0.0f) { y = x; z = x; } else y = 0.5f*(x + z); } else if(b.z == 0.0f) { z = 0.5f*(x + y); } return make_float3(x, y, z); } /* Rotation of point around axis and angle */ comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) { float costheta = cos(angle); float sintheta = sin(angle); float3 r; r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); return r; } /* NaN-safe math ops */ comp_device_inline float safe_sqrt(float f) { return sqrt(max(f, 0.0f)); } comp_device float safe_asin(float a) { return asin(clamp(a, -1.0f, 1.0f)); } comp_device float safe_acos(float a) { return acos(clamp(a, -1.0f, 1.0f)); } comp_device float compatible_pow(float x, float y) { #ifdef _KERNEL_GPU_ if(y == 0.0f) /* x^0 -> 1, including 0^0 */ return 1.0f; /* GPU pow doesn't accept negative x, do manual checks here */ if(x < 0.0f) { if(mod(-y, 2.0f) == 0.0f) return pow(-x, y); else return -pow(-x, y); } else if(x == 0.0f) return 0.0f; #endif return pow(x, y); } comp_device float safe_pow(float a, float b) { if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_pow(a, b); } comp_device float safe_divide(float a, float b) { return (b != 0.0f)? a/b: 0.0f; } comp_device float safe_log(float a, float b) { if(UNLIKELY(a <= 0.0f || b <= 0.0f)) return 0.0f; return safe_divide(log(a),log(b)); } comp_device float safe_modulo(float a, float b) { return (b != 0.0f)? mod(a, b): 0.0f; } comp_device_inline float xor_signmask(float x, int y) { return int_as_float(float_as_int(x) ^ y); } COMP_NAMESPACE_END #endif /* _UTIL_MATH_H_ */ // END #include "util/util_math.h" // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" // #ifdef __KERNEL_SSE2__ // #include "util/util_simd.h" // #endif COMP_NAMESPACE_BEGIN // comp_device_inline uchar float_to_byte(float val) // { // return ((val <= 0.0f) ? 0 : ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f))); // } // // comp_device_inline uchar4 color_float_to_byte(float3 c) // { // uchar r, g, b; // // r = float_to_byte(c.x); // g = float_to_byte(c.y); // b = float_to_byte(c.z); // // return make_uchar4(r, g, b, 0); // } // // comp_device_inline_inline float3 color_byte_to_float(uchar4 c) // { // return make_float3(c.x*(1.0f/255.0f), c.y*(1.0f/255.0f), c.z*(1.0f/255.0f)); // } // comp_device_inline float _srgb_to_linear(float c) { if(c < 0.04045f) return (c < 0.0f)? 0.0f: c * (1.0f/12.92f); else return pow((c + 0.055f) * (1.0f / 1.055f), 2.4f); } comp_device_inline float _linear_to_srgb(float c) { if(c < 0.0031308f) return (c < 0.0f)? 0.0f: c * 12.92f; else return 1.055f * pow(c, 1.0f / 2.4f) - 0.055f; } comp_device_inline float4 linear_to_hsv(float4 rgb) { float cmax, cmin, h, s, v, cdelta; float3 c; cmax = max(rgb.x, max(rgb.y, rgb.z)); cmin = min(rgb.x, min(rgb.y, rgb.z)); cdelta = cmax - cmin; v = cmax; if(cmax != 0.0f) { s = cdelta/cmax; } else { s = 0.0f; h = 0.0f; } if(s != 0.0f) { float3 cmax3 = make_float3(cmax, cmax, cmax); c = (cmax3 - make_float3(rgb.x, rgb.y, rgb.z))/cdelta; if (rgb.x == cmax) h = c.z - c.y; else if(rgb.y == cmax) h = 2.0f + c.x - c.z; else h = 4.0f + c.y - c.x; h /= 6.0f; if(h < 0.0f) h += 1.0f; } else { h = 0.0f; } return make_float4(h, s, v, rgb.w); } comp_device_inline float4 hsv_to_linear(float4 hsv) { float i, f, p, q, t, h, s, v; float4 rgb; h = hsv.x; s = hsv.y; v = hsv.z; if(s != 0.0f) { if(h == 1.0f) h = 0.0f; h *= 6.0f; i = floor(h); f = h - i; rgb = make_float4(f, f, f, hsv.w); p = v*(1.0f-s); q = v*(1.0f-(s*f)); t = v*(1.0f-(s*(1.0f-f))); if (i == 0.0f) rgb = make_float4(v, t, p, hsv.w); else if(i == 1.0f) rgb = make_float4(q, v, p, hsv.w); else if(i == 2.0f) rgb = make_float4(p, v, t, hsv.w); else if(i == 3.0f) rgb = make_float4(p, q, v, hsv.w); else if(i == 4.0f) rgb = make_float4(t, p, v, hsv.w); else rgb = make_float4(v, p, q, hsv.w); } else { rgb = make_float4(v, v, v, hsv.w); } return rgb; } // comp_device_inline float3 xyY_to_xyz(float x, float y, float Y) // { // float X, Z; // // if(y != 0.0f) X = (x / y) * Y; // else X = 0.0f; // // if(y != 0.0f && Y != 0.0f) Z = (1.0f - x - y) / y * Y; // else Z = 0.0f; // // return make_float3(X, Y, Z); // } // // comp_device_inline float3 xyz_to_rgb(float x, float y, float z) // { // return make_float3(3.240479f * x + -1.537150f * y + -0.498535f * z, // -0.969256f * x + 1.875991f * y + 0.041556f * z, // 0.055648f * x + -0.204043f * y + 1.057311f * z); // } // // #ifdef __KERNEL_SSE2__ // /* // * Calculate initial guess for arg^exp based on float representation // * This method gives a constant bias, which can be easily compensated by multiplication with bias_coeff. // * Gives better results for exponents near 1 (e. g. 4/5). // * exp = exponent, encoded as uint32_t // * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t // */ // template // comp_device_inline_inline ssef fastpow(const ssef &arg) // { // ssef ret; // ret = arg * cast(ssei(e2coeff)); // ret = ssef(cast(ret)); // ret = ret * cast(ssei(exp)); // ret = cast(ssei(ret)); // return ret; // } // // /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */ // comp_device_inline_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x) // { // ssef approx2 = old_result * old_result; // ssef approx4 = approx2 * approx2; // ssef t = x / approx4; // ssef summ = madd(ssef(4.0f), old_result, t); // return summ * ssef(1.0f/5.0f); // } // // /* Calculate pow(x, 2.4). Working domain: 1e-10 < x < 1e+10 */ // comp_device_inline_inline ssef fastpow24(const ssef &arg) // { // /* max, avg and |avg| errors were calculated in gcc without FMA instructions // * The final precision should be better than pow in glibc */ // // /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ // /* 0x3F4CCCCD = 4/5 */ // /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ // ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 // ssef arg2 = arg * arg; // ssef arg4 = arg2 * arg2; // x = improve_5throot_solution(x, arg4); /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ // x = improve_5throot_solution(x, arg4); /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ // x = improve_5throot_solution(x, arg4); /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ // return x * (x * x); // } // // comp_device_inline ssef color_srgb_to_scene_linear(const ssef &c) // { // sseb cmp = c < ssef(0.04045f); // ssef lt = max(c * ssef(1.0f/12.92f), ssef(0.0f)); // ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f/1.055f); /* fma */ // ssef gte = fastpow24(gtebase); // return select(cmp, lt, gte); // } // #endif /* __KERNEL_SSE2__ */ // comp_device_inline float4 srgb_to_linear(float4 c) { return make_float4(_srgb_to_linear(c.x), _srgb_to_linear(c.y), _srgb_to_linear(c.z), c.w); } comp_device_inline float4 linear_to_srgb(float4 c) { return make_float4(_linear_to_srgb(c.x), _linear_to_srgb(c.y), _linear_to_srgb(c.z), c.w); } comp_device_inline float4 linear_to_ycc_itu_bt601(float4 c) { return make_float4( ( 0.257f * c.x) + (0.504f * c.y) + (0.098f * c.z) + 16.0f/255.0f, (-0.148f * c.x) - (0.291f * c.y) + (0.439f * c.z) + 128.0f/255.0f, ( 0.439f * c.x) - (0.368f * c.y) - (0.071f * c.z) + 128.0f/255.0f, c.w ); } comp_device_inline float4 linear_to_yuv(float4 c) { return make_float4( ( 0.299f * c.x) + (0.587f * c.y) + (0.114f * c.z), (-0.147f * c.x) - (0.289f * c.y) + (0.436f * c.z), ( 0.615f * c.x) - (0.515f * c.y) - (0.100f * c.z), c.w ); } // comp_device_inline float4 color_srgb_to_scene_linear_v4(float4 c) // { // #ifdef __KERNEL_SSE2__ // ssef r_ssef; // float4 &r = (float4 &)r_ssef; // r = c; // r_ssef = color_srgb_to_scene_linear(r_ssef); // r.w = c.w; // return r; // #else // return make_float4(color_srgb_to_scene_linear(c.x), // color_srgb_to_scene_linear(c.y), // color_srgb_to_scene_linear(c.z), // c.w); // #endif // } comp_device_inline float linear_rgb_to_gray(float4 c) { return c.x*0.2126f + c.y*0.7152f + c.z*0.0722f; } comp_device_inline float component(float4 c, uint channel) { switch (channel) { default: case 0: return c.x; case 1: return c.y; case 2: return c.z; case 3: return c.w; } } COMP_NAMESPACE_END #endif /* __UTIL_COLOR_H__ */ // END #include "util/util_color.h" // -------------------------------------------------------- // START #include "kernel/kernel_random.h" // -------------------------------------------------------- COMP_NAMESPACE_BEGIN comp_device_inline float get_random_float(KernelGlobals * kg) //uniform between 0-1 { kg->seed = ((kg->seed) * 16807 ) % 2147483647; return (float)(kg->seed) * 4.6566129e-10; } COMP_NAMESPACE_END // END #include "kernel/kernel_random.h" // -------------------------------------------------------- // START #include "kernel/kernel_image.h" // -------------------------------------------------------- COMP_NAMESPACE_BEGIN #define tex_fetch(type, info, pixel_offset) ((comp_device_global type*)(kg->buffers[info->data_ptr.index]+info->data_ptr.offset))[(pixel_offset)] comp_device_inline comp_device_global comp_device_struct TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { return &((comp_device_global comp_device_struct TextureInfo*)(kg->buffers[kg->texture_ptr.index]+kg->texture_ptr.offset))[id]; } comp_device_inline int wrap_periodic(int x, int width) { x %= width; if(x < 0) x += width; return x; } comp_device_inline int wrap_clamp(int x, int width) { return clamp(x, 0, width-1); } comp_device_inline float frac(float x, int *ix) { int i = float_to_int(x) - ((x < 0.0f)? 1: 0); *ix = i; return x - (float)i; } comp_device_inline float kernel_tex_image_interp(comp_device_struct KernelGlobals *kg, int id, float x, float y){ comp_device_global comp_device_struct TextureInfo *info = kernel_tex_info(kg, id); int width = (int)info->width; int height = (int)info->height; int ix, iy, nix, niy; if (info->interpolation == INTERPOLATION_CLOSEST) { frac(x * width, &ix); frac(y * height, &iy); switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0 || y > 1.0) { return 0.0f; } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return 0.0f; } return tex_fetch(float, info, ix +iy*width); } else if(info->interpolation == INTERPOLATION_LINEAR) { float tx = frac(x*width - 0.5f, &ix); float ty = frac(y*height - 0.5f, &iy); switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); nix = wrap_periodic(ix+1, width); niy = wrap_periodic(iy+1, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { return 0.0f; } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: nix = wrap_clamp(ix+1, width); niy = wrap_clamp(iy+1, height); ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return 0.0f; } float r = (1.0f - ty)*(1.0f - tx)*tex_fetch(float, info, ix + iy*width); r += (1.0f - ty)*tx*tex_fetch(float, info, nix + iy*width); r += ty*(1.0f - tx)*tex_fetch(float, info, ix + niy*width); r += ty*tx*tex_fetch(float, info, nix + niy*width); return r; } else { /* Bicubic b-spline interpolation. */ float tx = frac(x*width - 0.5f, &ix); float ty = frac(y*height - 0.5f, &iy); int pix, piy, nnix, nniy; switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); pix = wrap_periodic(ix-1, width); piy = wrap_periodic(iy-1, height); nix = wrap_periodic(ix+1, width); niy = wrap_periodic(iy+1, height); nnix = wrap_periodic(ix+2, width); nniy = wrap_periodic(iy+2, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { return 0.0f; } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: pix = wrap_clamp(ix-1, width); piy = wrap_clamp(iy-1, height); nix = wrap_clamp(ix+1, width); niy = wrap_clamp(iy+1, height); nnix = wrap_clamp(ix+2, width); nniy = wrap_clamp(iy+2, height); ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return 0.0f; } const int xc[4] = {pix, ix, nix, nnix}; const int yc[4] = {width * piy, width * iy, width * niy, width * nniy}; float u[4], v[4]; /* Some helper macro to keep code reasonable size, * let compiler to inline all the matrix multiplications. */ #define DATA(x, y) (tex_fetch(float, info, xc[x] + yc[y])) #define TERM(col) \ (v[col] * (u[0] * DATA(0, col) + \ u[1] * DATA(1, col) + \ u[2] * DATA(2, col) + \ u[3] * DATA(3, col))) #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ { \ u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ u[3] = (1.0f / 6.0f) * t * t * t; \ } (void)0 SET_CUBIC_SPLINE_WEIGHTS(u, tx); SET_CUBIC_SPLINE_WEIGHTS(v, ty); /* Actual interpolation. */ return TERM(0) + TERM(1) + TERM(2) + TERM(3); #undef SET_CUBIC_SPLINE_WEIGHTS #undef TERM #undef DATA } return 0.0f; } comp_device_inline float4 kernel_tex_image_interp_4(comp_device_struct KernelGlobals *kg, int id, float x, float y){ comp_device_global comp_device_struct TextureInfo *info = kernel_tex_info(kg, id); int width = (int)info->width; int height = (int)info->height; int ix, iy, nix, niy; if (info->interpolation == INTERPOLATION_CLOSEST) { frac(x*width, &ix); frac(y*height, &iy); switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } return tex_fetch(float4, info, ix +iy*width); } else if(info->interpolation == INTERPOLATION_LINEAR) { float tx = frac(x*width - 0.5f, &ix); float ty = frac(y*height - 0.5f, &iy); switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); nix = wrap_periodic(ix+1, width); niy = wrap_periodic(iy+1, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: nix = wrap_clamp(ix+1, width); niy = wrap_clamp(iy+1, height); ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } float4 r = (1.0f - ty)*(1.0f - tx)*tex_fetch(float4, info, ix + iy*width); r += (1.0f - ty)*tx*tex_fetch(float4, info, nix + iy*width); r += ty*(1.0f - tx)*tex_fetch(float4, info, ix + niy*width); r += ty*tx*tex_fetch(float4, info, nix + niy*width); return r; } else { /* Bicubic b-spline interpolation. */ float tx = frac(x*width - 0.5f, &ix); float ty = frac(y*height - 0.5f, &iy); int pix, piy, nnix, nniy; switch(info->extension) { case EXTENSION_REPEAT: ix = wrap_periodic(ix, width); iy = wrap_periodic(iy, height); pix = wrap_periodic(ix-1, width); piy = wrap_periodic(iy-1, height); nix = wrap_periodic(ix+1, width); niy = wrap_periodic(iy+1, height); nnix = wrap_periodic(ix+2, width); nniy = wrap_periodic(iy+2, height); break; case EXTENSION_CLIP: if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } ATTR_FALLTHROUGH; case EXTENSION_EXTEND: pix = wrap_clamp(ix-1, width); piy = wrap_clamp(iy-1, height); nix = wrap_clamp(ix+1, width); niy = wrap_clamp(iy+1, height); nnix = wrap_clamp(ix+2, width); nniy = wrap_clamp(iy+2, height); ix = wrap_clamp(ix, width); iy = wrap_clamp(iy, height); break; default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } const int xc[4] = {pix, ix, nix, nnix}; const int yc[4] = {width * piy, width * iy, width * niy, width * nniy}; float u[4], v[4]; /* Some helper macro to keep code reasonable size, * let compiler to inline all the matrix multiplications. */ #define DATA(x, y) (tex_fetch(float4, info, xc[x] + yc[y])) #define TERM(col) \ (v[col] * (u[0] * DATA(0, col) + \ u[1] * DATA(1, col) + \ u[2] * DATA(2, col) + \ u[3] * DATA(3, col))) #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ { \ u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ u[3] = (1.0f / 6.0f) * t * t * t; \ } (void)0 SET_CUBIC_SPLINE_WEIGHTS(u, tx); SET_CUBIC_SPLINE_WEIGHTS(v, ty); /* Actual interpolation. */ return TERM(0) + TERM(1) + TERM(2) + TERM(3); #undef SET_CUBIC_SPLINE_WEIGHTS #undef TERM #undef DATA } return make_float4(1.0f, 0.5f, 0.25f, 1.0f); } COMP_NAMESPACE_END // END #include "kernel/kernel_image.h" // -------------------------------------------------------- // START #include "kernel/kernel_viewport.h" // -------------------------------------------------------- #ifndef _KERNEL_VIEWPORT_H_ #define _KERNEL_VIEWPORT_H_ COMP_NAMESPACE_BEGIN // ASPECT RATIO as parameter? comp_device_inline comp_device_global comp_device_struct Viewport* kernel_viewport(KernelGlobals *kg, uint id) { return &((comp_device_global comp_device_struct Viewport*)(kg->buffers[kg->viewport_ptr.index]+kg->viewport_ptr.offset))[id]; } /** * Function to convert an x, y of a viewport to a ray. */ comp_device_inline void viewport_to_ray(comp_in(comp_device_struct KernelGlobals, kg), int viewport_id, comp_in(float2, coordinates), comp_device_struct Ray* ray) { comp_device_global comp_device_struct Viewport* vp = kernel_viewport(&kg, viewport_id); switch (vp->type) { case CAM_ORTHO: { // Worldposition of the pixel we are calculating. // ((((y-0.5)/aspect_ratio)*up_vector) + (x-0.5)*side_vector) * Scale + viewportPosition float ry = (coordinates.y-0.5)/kg.aspect_ratio; float rx = (coordinates.x-0.5); float3 unscaled = ry*vp->up_vector + rx*vp->side_vector; float3 scaled = unscaled * vp->scale; float3 worldposition = scaled + vp->position; ray->P = worldposition; ray->D = vp->direction; ray->side = vp->side_vector; break; } case CAM_PERSP: { //plane_position // (((y-0.5)/aspect_ratio*up_vector) + (x-0.5)*side_vector) * scale + vp->plane_position) - vp->position float ry = ((coordinates.y-0.5)/kg.aspect_ratio); float rx = (coordinates.x-0.5); float3 unscaled = ry*vp->up_vector + rx*vp->side_vector; float3 scaled = unscaled * vp->scale; float3 worldposition = scaled + vp->plane_position; float3 direction = normalize(worldposition - vp->position); ray->D = direction; ray->P = vp->position; ray->side = vp->side_vector; break; } default: ray->P = make_float3(coordinates.x, coordinates.y, 0.0); ray->D = make_float3(0.0, 0.0, 1.0); ray->side = make_float3(1.0, 0.0, 0.0); } } /** * Function to ray to an x, y for a viewport. */ comp_device_inline float2 ray_to_viewport(comp_in(comp_device_struct KernelGlobals, kg), comp_device_struct Ray* ray, int viewport_id) { comp_device_global comp_device_struct Viewport* vp = kernel_viewport(&kg, viewport_id); switch (vp->type) { case CAM_ORTHO: case CAM_PERSP: { float distance; if (ray_plane_intersect(ray, vp->plane_position, vp->direction, &distance)) { float3 worldpositionOfIntersection = ray->P + (distance * ray->D); float3 scaled = worldpositionOfIntersection - vp->position; float3 unscaled = scaled / vp->scale; // Use vector projections to calculate the rx and ry return make_float2( dot(unscaled, vp->side_vector) + 0.5, dot(unscaled, vp->up_vector) * kg.aspect_ratio + 0.5); } else { return make_float2(0.0, 0.0); } break; } case CAM_PLANE: { float distance; if (ray_plane_intersect(ray, vp->plane_position, vp->direction, &distance)) { float3 worldpositionOfIntersection = ray->P + (distance * ray->D); float3 scaled = worldpositionOfIntersection - vp->position; // Use vector projections to calculate the rx and ry return make_float2( dot(scaled, vp->side_vector), dot(scaled, vp->up_vector)) / make_float2( vp->scale_x, vp->scale_y) + 0.5f; } else { return make_float2(0.0, 0.0); } break; } default: return make_float2(ray->P.x, ray->P.y); } } COMP_NAMESPACE_END #endif // END #include "kernel/kernel_viewport.h" // -------------------------------------------------------- // START #include "kernel/operations/operations.h" // -------------------------------------------------------- #ifndef _OPERATIONS_H_ #define _OPERATIONS_H_ // -------------------------------------------------------- // START #include "kernel/operations/op_types.h" // -------------------------------------------------------- #ifndef _OP_TYPES_H_ #define _OP_TYPES_H_ COMP_NAMESPACE_BEGIN /* CVM stack has a fixed s ize */ #define CVM_STACK_SIZE 256 /* CVM stack has a fixed size */ #define CVM_RAY_STACK_SIZE 32 enum { OP_END=UINT(0), OP_COLOR=UINT(1), OP_VALUE=UINT(2), OP_OUTPUT=UINT(3), OP_OUTPUT_RAY=UINT(4), OP_MIX=UINT(5), OP_IMAGE_1=UINT(6), OP_IMAGE_4=UINT(7), OP_BLUR=UINT(8), OP_BLUR_RAY=UINT(9), OP_VALUE_TO_COLOR=UINT(10), OP_VALUE_TO_VECTOR=UINT(11), OP_COLOR_TO_VALUE=UINT(12), OP_COLOR_TO_VECTOR=UINT(13), OP_VECTOR_TO_COLOR=UINT(14), OP_VECTOR_TO_VALUE=UINT(15), OP_ALPHA_OVER=UINT(16), OP_MATH=UINT(17), OP_COLOR_MATTE=UINT(18), OP_CHROMA_MATTE=UINT(19), OP_SEPARATE_R=UINT(20), OP_SEPARATE_G=UINT(21), OP_SEPARATE_B=UINT(22), OP_SEPARATE_A=UINT(23), OP_SEPARATE_H=UINT(24), OP_SEPARATE_S=UINT(25), OP_SEPARATE_V=UINT(26), OP_COMBINE_RGBA=UINT(27), OP_COMBINE_HSVA=UINT(28), OP_HSV=UINT(29), OP_BRIGHTNESS_CONTRAST=UINT(30), OP_GAMMA=UINT(31), OP_COLORBALANCE_LGG=UINT(32), OP_COLORBALANCE_CDL=UINT(33), OP_COLOR_SPILL=UINT(40), OP_SET_ALPHA=UINT(41), OP_CHANNEL_MATTE=UINT(42), OP_SEPARATE_YUV_Y=UINT(43), OP_SEPARATE_YUV_U=UINT(44), OP_SEPARATE_YUV_V=UINT(45), OP_DIFFERENCE_MATTE=UINT(46), OP_DISTANCE_MATTE_LINEAR=UINT(47), OP_DISTANCE_MATTE_YCC=UINT(48), OP_LUMINANCE_MATTE=UINT(49), OP_NOP=UINT(999999) }; enum { MIX_NODE_BLEND=UINT(0), MIX_NODE_ADD=UINT(1), MIX_NODE_MULT=UINT(2), MIX_NODE_SUB=UINT(3), MIX_NODE_SCREEN=UINT(4), MIX_NODE_DIV=UINT(5), MIX_NODE_DIFF=UINT(6), MIX_NODE_DARK=UINT(7), MIX_NODE_LIGHT=UINT(8), MIX_NODE_OVERLAY=UINT(9), MIX_NODE_DODGE=UINT(10), MIX_NODE_BURN=UINT(11), MIX_NODE_HUE=UINT(12), MIX_NODE_SAT=UINT(13), MIX_NODE_VAL=UINT(14), MIX_NODE_COLOR=UINT(15), MIX_NODE_SOFT=UINT(16), MIX_NODE_LINEAR=UINT(17) }; /** Keep in sync with NODE_MATH_* */ enum { MATH_ADD = UINT(0), MATH_SUB = UINT(1), MATH_MUL = UINT(2), MATH_DIVIDE = UINT(3), MATH_SIN = UINT(4), MATH_COS = UINT(5), MATH_TAN = UINT(6), MATH_ASIN = UINT(7), MATH_ACOS = UINT(8), MATH_ATAN = UINT(9), MATH_POW = UINT(10), MATH_LOG = UINT(11), MATH_MIN = UINT(12), MATH_MAX = UINT(13), MATH_ROUND = UINT(14), MATH_LESS = UINT(15), MATH_GREATER = UINT(16), MATH_MOD = UINT(17), MATH_ABS = UINT(18) }; // Falloff filter types. enum { FALLOFF_FILTER_BOX=UINT(0), FALLOFF_FILTER_TENT=UINT(1), FALLOFF_FILTER_QUAD=UINT(2), FALLOFF_FILTER_CUBIC=UINT(3), FALLOFF_FILTER_CATROM=UINT(4), FALLOFF_FILTER_GAUSS=UINT(5), FALLOFF_FILTER_MITCH=UINT(6) }; enum { CHANNEL_MATTE_CS_RGB = UINT(1), CHANNEL_MATTE_CS_HSV = UINT(2), CHANNEL_MATTE_CS_YUV = UINT(3), CHANNEL_MATTE_CS_YCC = UINT(4) }; COMP_NAMESPACE_END #endif // END #include "kernel/operations/op_types.h" COMP_NAMESPACE_BEGIN #define stack_load_float4(a) stack4[a] #define stack_store_float4(a, f) stack4[a] = f // NOTE: vectors are stored in the stack4 #define stack_load_float3(a) make_float3(stack4[a].x, stack4[a].y, stack4[a].z) #define stack_store_float3(a, f) stack4[a] = make_float4(f.x, f.y, f.z, 0.0f); #define stack_load_float(a) stack[a] #define stack_store_float(a, f) stack[a] = f #define read_program_line(result) result = kg._program[ip++] #define read_program_line_float(result) { uint4 _program_line; read_program_line(_program_line); result = make_float4(uint_as_float(_program_line.x), uint_as_float(_program_line.y), uint_as_float(_program_line.z), uint_as_float(_program_line.w)); } comp_device_inline void eval_program( comp_in(comp_device_struct KernelGlobals, kg), comp_in(comp_device_struct CompositorData, cd), comp_out(float4, result) ) { float4 stack4[CVM_STACK_SIZE]; float stack[CVM_STACK_SIZE]; comp_device_struct Ray ray_stack[CVM_RAY_STACK_SIZE]; float total_weight = 0.0; comp_get_ref(result) = make_float4(0.0, 0.0, 0.0, 0.0); for (int _sample_number = 0 ; _sample_number < kg.num_samples; _sample_number ++) { cd.sample_number = _sample_number; // Instruction pointer of the program. int ip = 0; bool program_not_finished = true; while (program_not_finished) { uint4 program_line; read_program_line(program_line); /* program_line.x = operation type (uint) program_line.y = result_offset (uint) */ uint result_offset = program_line.y; switch(program_line.x) { case OP_END: program_not_finished = false; break; case OP_COLOR: { // -------------------------------------------------------- // START #include "kernel/operations/op_color.h" // -------------------------------------------------------- float4 node_color_data; read_program_line_float(node_color_data); stack_store_float4(result_offset, node_color_data); // stack_store_float(result_offset, 1.0); // END #include "kernel/operations/op_color.h" break; } case OP_VALUE: { float value = uint_as_float(program_line.z); // -------------------------------------------------------- // START #include "kernel/operations/op_value.h" // -------------------------------------------------------- stack_store_float(result_offset, value); // stack_store_float(result_offset+UINT(1), 1.0); // END #include "kernel/operations/op_value.h" break; } case OP_OUTPUT_RAY: { uint ray_offset = program_line.y; uint viewport_id = program_line.z; // -------------------------------------------------------- // START #include "kernel/operations/op_output_ray.h" // -------------------------------------------------------- // base on x, y values construct the first ray... float2 coord = cd.coord + kg.pixel_size * make_float2(get_random_float(&kg), get_random_float(&kg)); viewport_to_ray(kg, viewport_id, coord, &ray_stack[ray_offset]); // END #include "kernel/operations/op_output_ray.h" break; } case OP_OUTPUT: { // -------------------------------------------------------- // START #include "kernel/operations/op_output.h" // -------------------------------------------------------- comp_get_ref(result) += stack_load_float4(result_offset); total_weight += 1.0; // END #include "kernel/operations/op_output.h" break; } case OP_MIX: { uint mix_type = program_line.z; uint premultiply_alpha = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_mix.h" // -------------------------------------------------------- uint4 data; float4 colora; float4 colorb; float3 colora3; float3 colorb3; float value; read_program_line(data); colora = stack_load_float4(data.x); colorb = stack_load_float4(data.y); colora3 = make_float3(colora.x, colora.y, colora.z); colorb3 = make_float3(colorb.x, colorb.y, colorb.z); value = stack_load_float(data.z); float3 mixed_color3; float alpha = colora.w; if (premultiply_alpha) { alpha *= colorb.w; } // TODO: move to more generic methods. see svm_color_util.h switch (mix_type) { case MIX_NODE_ADD: mixed_color3 = mix(colora3, colora3 + colorb3, value); break; case MIX_NODE_SUB: mixed_color3 = mix(colora3, colora3 - colorb3, value); break; case MIX_NODE_MULT: mixed_color3 = mix(colora3, colora3 * colorb3, value); break; case MIX_NODE_DIFF: mixed_color3 = mix(colora3, fabs(colora3 - colorb3), value); break; case MIX_NODE_BLEND: default: mixed_color3 = mix(colora3, colorb3, value); break; } if (data.w) { mixed_color3 = clamp(mixed_color3, 0.0f, 1.0f); } float4 mixed_color4 = make_float4(mixed_color3.x,mixed_color3.y,mixed_color3.z, alpha); stack_store_float4(result_offset, mixed_color4); stack_store_float(result_offset, 1.0); // END #include "kernel/operations/op_mix.h" break; } case OP_IMAGE_1: { uint ray_offset = program_line.z; uint texture_slot = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_image_float1.h" // -------------------------------------------------------- uint4 data; read_program_line(data); int viewport_id = data.x; comp_device_struct Ray ray = ray_stack[ray_offset]; float2 uv = ray_to_viewport(kg, &ray, viewport_id); float color = kernel_tex_image_interp(&kg, texture_slot, uv.x, uv.y); stack_store_float(result_offset, color); // END #include "kernel/operations/op_image_float1.h" break; } case OP_IMAGE_4: { uint ray_offset = program_line.z; uint texture_slot = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_image_float4.h" // -------------------------------------------------------- uint4 data; read_program_line(data); int viewport_id = data.x; comp_device_struct Ray ray = ray_stack[ray_offset]; float2 uv = ray_to_viewport(kg, &ray, viewport_id); float4 color = kernel_tex_image_interp_4(&kg, texture_slot, uv.x, uv.y); stack_store_float4(result_offset, color); // END #include "kernel/operations/op_image_float4.h" break; } case OP_BLUR_RAY: { uint ray_offset = program_line.y; uint input_ray_offset = program_line.z; // -------------------------------------------------------- // START #include "kernel/operations/op_blur_ray.h" // -------------------------------------------------------- uint4 node1; read_program_line(node1); float size_scale = stack_load_float(node1.z); float size_x = uint_as_float(node1.x) * size_scale; //float size_y = uint_as_float(node1.y) * size_scale; float len = get_random_float(&kg); ray_stack[ray_offset].P = ray_stack[input_ray_offset].P; //Bending the direction input ray float3 direction = ray_stack[input_ray_offset].D; // TODO: size_x is not correct. float3 d1 = rotate_around_axis(direction, ray_stack[input_ray_offset].side, size_x * len); float3 d2 = rotate_around_axis(d1, direction, M_2PI_F * get_random_float(&kg)); ray_stack[ray_offset].D = d2; ray_stack[ray_offset].store_float1 = len; // END #include "kernel/operations/op_blur_ray.h" break; } case OP_BLUR: { // -------------------------------------------------------- // START #include "kernel/operations/op_blur.h" // -------------------------------------------------------- #define GAUSSFAC 1.6 #define TWO_GAUSSFAC2 (2.0 * GAUSSFAC * GAUSSFAC) uint4 node1; read_program_line(node1); float4 color1 = stack_load_float4(node1.x); float4 color2; // Read the ray float1 that is used for sampling the Image input float len = ray_stack[node1.y].store_float1; float value = 0.0; switch (node1.z) { case FALLOFF_FILTER_GAUSS: { float x = len * 3.0f * GAUSSFAC; value = 1.0f / sqrt((float)M_PI * TWO_GAUSSFAC2) * exp(-x*x / TWO_GAUSSFAC2); break; } case FALLOFF_FILTER_TENT: { value = 1.0 - len; break; } case FALLOFF_FILTER_BOX: { value = 1.0; break; } default: { value = 0.0; break; } } color2 = color1 * value; color2.w = color1.w; stack_store_float4(result_offset, color2); stack_store_float(result_offset, 1.0); // END #include "kernel/operations/op_blur.h" break; } case OP_ALPHA_OVER: { // -------------------------------------------------------- // START #include "kernel/operations/op_alpha_over.h" // -------------------------------------------------------- uint4 data; float4 colorBackground; float4 colorForeground; float4 result; float value; read_program_line(data); colorBackground = stack_load_float4(data.x); colorForeground = stack_load_float4(data.y); value = stack_load_float(data.z); if (colorForeground.w <= 0.0f) { result = colorBackground; } else if (value == 1.0f && colorForeground.w >= 1.0f) { result = colorForeground; } else { float premul = value * colorForeground.w; float mul = 1.0f - premul; result = mul*colorBackground + premul*colorForeground; result.w = mul*colorBackground.w + value * colorForeground.w; } stack_store_float4(result_offset, result); stack_store_float(result_offset, 1.0); // END #include "kernel/operations/op_alpha_over.h" break; } case OP_COLOR_MATTE: { uint input_color_offset = program_line.z; uint input_key_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_color.h" // -------------------------------------------------------- float4 node1; read_program_line_float(node1); float4 color = stack_load_float4(input_color_offset); float4 key_color = stack_load_float4(input_key_offset); float4 color_hsv = linear_to_hsv(color); float4 key_color_hsv = linear_to_hsv(key_color); float h_wrap; float value; if ( /* do hue last because it needs to wrap, and does some more checks */ /* sat */ (fabs(color_hsv.y - key_color_hsv.y) < node1.y) && /* val */ (fabs(color_hsv.z - key_color_hsv.z) < node1.z) && /* multiply by 2 because it wraps on both sides of the hue, * otherwise 0.5 would key all hue's */ /* hue */ ((h_wrap = 2.0f * fabs(color_hsv.x - key_color_hsv.x)) < node1.x || (2.0f - h_wrap) < node1.x) ) { value = 0.0f; /* make transparent */ } else { value = color.w; } color.w = value; stack_store_float4(result_offset, color); stack_store_float(result_offset, value); // END #include "kernel/operations/op_matte_color.h" break; } case OP_CHROMA_MATTE: { uint input_color_offset = program_line.z; uint input_key_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_chroma.h" // -------------------------------------------------------- float4 data; read_program_line_float(data); float4 color = stack_load_float4(input_color_offset); float4 key_color = stack_load_float4(input_key_offset); float4 color_ycc = linear_to_ycc_itu_bt601(color); float4 key_color_ycc = linear_to_ycc_itu_bt601(key_color); float alpha; float acceptance = data.x; float cutoff = data.y; float gain = data.z; color_ycc = (color_ycc * 2.0f) - 1.0f; key_color_ycc = (key_color_ycc * 2.0f) - 1.0f; float theta = atan2(key_color_ycc.z, key_color_ycc.y); float x_angle = color_ycc.y * cos(theta) + color_ycc.z * sin(theta); float z_angle = color_ycc.z * cos(theta) - color_ycc.y * sin(theta); float kfg = x_angle- (fabs(z_angle) / tan(acceptance / 2.0f)); if (kfg > 0.0f) { alpha = 1.0f - (kfg / gain); float beta = atan2(z_angle, x_angle); if (fabs(beta) < (cutoff / 2.0f)) { alpha = 0.0f; } alpha = min(alpha, color.w); } else { alpha = color.w; } color.w = alpha; stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_chroma.h" break; } case OP_CHANNEL_MATTE: { uint input_color_offset = program_line.z; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_channel.h" // -------------------------------------------------------- uint4 data; float4 dataf; read_program_line(data); read_program_line_float(dataf); float4 color = stack_load_float4(input_color_offset); const uint color_space = data.x; const uint channel = data.y; const uint method = data.z; const uint limit_channel = data.w; const float limit_max = dataf.x; const float limit_min = dataf.y; const float limit_range = dataf.z; float4 converted_color; switch (color_space) { case CHANNEL_MATTE_CS_RGB: { converted_color = color; break; } case CHANNEL_MATTE_CS_HSV: { converted_color = linear_to_hsv(color); break; } case CHANNEL_MATTE_CS_YUV: { converted_color = linear_to_yuv(color); break; } case CHANNEL_MATTE_CS_YCC: { converted_color = linear_to_ycc_itu_bt601(color); break; } } float channel_value; float limit_value1; float limit_value2; switch (method) { case 0: /* Single */ { channel_value = component(converted_color, channel); limit_value1 = limit_value2 = component(converted_color, limit_channel); break; } case 1: /* MAX */ { channel_value = component(converted_color, channel); limit_value1 = component(converted_color, (channel+1) % 3); limit_value2 = component(converted_color, (channel+2) % 3); break; } } float alpha = 1.0 - (channel_value - max(limit_value1, limit_value1)); if (alpha > limit_max) { alpha = color.w; } else if (alpha < limit_min) { alpha = 0.0f; } else { alpha = (alpha - limit_min) / limit_range; } color.w = alpha; stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_channel.h" break; } case OP_DIFFERENCE_MATTE: { uint input_color_offset = program_line.z; uint input_key_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_difference.h" // -------------------------------------------------------- float4 data; read_program_line_float(data); const float tolerance = data.x; const float falloff = data.y; float4 color = stack_load_float4(input_color_offset); float4 color_key = stack_load_float4(input_key_offset); float4 tmp = fabs(color_key - color); float difference = (tmp.x + tmp.y + tmp.z) / 3.0f; color.w = clamp((difference - tolerance) / falloff, 0.0f , color.w); stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_difference.h" break; } case OP_DISTANCE_MATTE_LINEAR: { uint input_color_offset = program_line.z; uint input_key_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_distance_linear.h" // -------------------------------------------------------- float4 data; read_program_line_float(data); const float tolerance = data.x; const float falloff = data.y; float4 color = stack_load_float4(input_color_offset); float4 color_key = stack_load_float4(input_key_offset); float distance = length_v3( make_float3(color_key.x, color_key.y, color_key.z) - make_float3(color.x, color.y, color.z) ); color.w = clamp((distance - tolerance) / falloff, 0.0f , color.w); stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_distance_linear.h" break; } case OP_DISTANCE_MATTE_YCC: { uint input_color_offset = program_line.z; uint input_key_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_distance_ycc.h" // -------------------------------------------------------- float4 data; read_program_line_float(data); const float tolerance = data.x; const float falloff = data.y; float4 color = stack_load_float4(input_color_offset); float4 color_ycc = linear_to_ycc_itu_bt601(color); float4 color_key_ycc = linear_to_ycc_itu_bt601(stack_load_float4(input_key_offset)); float distance = length_v2(make_float2(color_key_ycc.y, color_key_ycc.z) - make_float2(color_ycc.y, color_ycc.z)); color.w = clamp((distance - tolerance) / falloff, 0.0f , color.w); stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_distance_ycc.h" break; } case OP_LUMINANCE_MATTE: { uint input_color_offset = program_line.z; // -------------------------------------------------------- // START #include "kernel/operations/op_matte_luminance.h" // -------------------------------------------------------- float4 data; read_program_line_float(data); const float high = data.x; const float low = data.y; float4 color = stack_load_float4(input_color_offset); float4 color_yuv = linear_to_yuv(color); color.w = clamp((color_yuv.x - low) / (high - low), 0.0f, color.w); stack_store_float4(result_offset, color); stack_store_float(result_offset, color.w); // END #include "kernel/operations/op_matte_luminance.h" break; } case OP_COLOR_SPILL: { uint input_color_offset = program_line.z; uint input_value_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_color_spill.h" // -------------------------------------------------------- uint4 data; float4 uspill; read_program_line(data); read_program_line_float(uspill); float4 color = stack_load_float4(input_color_offset); float value = min(1.0f, stack_load_float(input_value_offset)); float spill_value; float channel2; float channel3; float lim_value; float4 mut; const float lim_scale = uspill.w; /* Spill channel */ switch (data.y) { case 0: /* R */ { mut = make_float4(-1.0f, 1.0f, 1.0f, 0.0f); spill_value = color.x; channel2 = color.y; channel3 = color.z; break; } case 1: /* G */ { mut = make_float4(1.0f, -1.0f, 1.0f, 0.0f); spill_value = color.y; channel2 = color.x; channel3 = color.z; break; } case 2: /* B */ { mut = make_float4(1.0f, 1.0f, -1.0f, 0.0f); spill_value = color.z; channel2 = color.x; channel3 = color.y; break; } } switch (data.x) { /* method */ case 0: { switch (data.z) { /* lim channel */ case 0: { lim_value = color.x; break; } case 1: { lim_value = color.y; break; } case 2: { lim_value = color.z; break; } } break; } case 1: /* average */ { lim_value = (channel2 + channel3) / 2.0f; break; } } float map = value * (spill_value - (lim_scale * lim_value)); if (map > 0.0f) { float alpha = color.w; color = color + mut * uspill * map; color.w = alpha; } stack_store_float4(result_offset, color); // END #include "kernel/operations/op_color_spill.h" break; } case OP_SET_ALPHA: { uint input_color_offset = program_line.z; uint input_value_offset = program_line.w; // -------------------------------------------------------- // START #include "kernel/operations/op_set_alpha.h" // -------------------------------------------------------- float4 color = stack_load_float4(input_color_offset); float alpha = stack_load_float(input_value_offset); color.w = alpha; stack_store_float4(result_offset, make_float4( color.x, color.y, color.z, alpha )); // END #include "kernel/operations/op_set_alpha.h" break; } case OP_MATH: { uint operation_type = program_line.z; // -------------------------------------------------------- // START #include "kernel/operations/op_math.h" // -------------------------------------------------------- uint4 data; read_program_line(data); float value1 = stack_load_float(data.x); float value2 = stack_load_float(data.y); float result; switch (operation_type) { case MATH_ADD: { result = value1 + value2; break; } case MATH_SUB: { result = value1 - value2; break; } case MATH_MUL: { result = value1 * value2; break; } case MATH_DIVIDE: { if (value2 != 0.0) { result = value1 / value2; } else { result = 0.0; } break; } case MATH_SIN: { result = sin(value1); break; } case MATH_COS: { result = cos(value1); break; } case MATH_TAN: { result = tan(value1); break; } case MATH_ASIN: { result = asin(value1); break; } case MATH_ACOS: { result = acos(value1); break; } case MATH_ATAN: { result = atan(value1); break; } case MATH_POW: { if (value1 >= 0.0) { result = pow(value1, value2); } else { float y_mod_1 = mod(value2, 1); /* if input value is not nearly an integer, fall back to zero, nicer than straight rounding */ if (y_mod_1 > 0.999f || y_mod_1 < 0.001f) { result = pow(value1, floor(value2 + 0.5f)); } else { result = 0.0; } } break; } case MATH_LOG: { if (value1 > 0.0 && value2 > 0.0) { result = log(value1) / log(value2); } else { result = 0.0; } break; } case MATH_MIN: { result = min(value1, value2); break; } case MATH_MAX: { result = max(value1, value2); break; } case MATH_ROUND: { result = round(value1); break; } case MATH_LESS: { result = value1 < value2? 1.0f: 0.0f; break; } case MATH_GREATER: { result = value1 > value2? 1.0f: 0.0f; break; } case MATH_MOD: { if (value2 == 0.0f) { result = 0.0f; } else { result = mod(value1, value2); } break; } case MATH_ABS: { result = abs(value1); } default: { result = -1; break; } } // Clamping if (program_line.w) { result = clamp(result, 0.0f, 1.0f); } stack_store_float(result_offset, result); // END #include "kernel/operations/op_math.h" break; } /* CONVERTERS */ case OP_VALUE_TO_COLOR: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_value_to_color.h" // -------------------------------------------------------- float value = stack_load_float(program_line.z); float4 color = make_float4(value, value, value, 1.0); stack_store_float4(result_offset, color); // END #include "kernel/operations/op_convert_value_to_color.h" break; } case OP_VALUE_TO_VECTOR: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_value_to_vector.h" // -------------------------------------------------------- float value = stack_load_float(program_line.z); float3 vector = make_float3(value, value, value); stack_store_float3(result_offset, vector); // END #include "kernel/operations/op_convert_value_to_vector.h" break; } case OP_VECTOR_TO_VALUE: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_vector_to_value.h" // -------------------------------------------------------- float3 vector = stack_load_float3(program_line.z); float value = (vector.x + vector.y + vector.z) / 3.0f; stack_store_float(result_offset, value); // END #include "kernel/operations/op_convert_vector_to_value.h" break; } case OP_VECTOR_TO_COLOR: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_vector_to_color.h" // -------------------------------------------------------- float3 vector = stack_load_float3(program_line.z); stack_store_float4(result_offset, make_float4(vector.x, vector.y, vector.z, 1.0f)); // END #include "kernel/operations/op_convert_vector_to_color.h" break; } case OP_COLOR_TO_VALUE: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_color_to_value.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_rgb_to_gray(stack_load_float4(program_line.z))); // END #include "kernel/operations/op_convert_color_to_value.h" break; } case OP_COLOR_TO_VECTOR: { // -------------------------------------------------------- // START #include "kernel/operations/op_convert_color_to_vector.h" // -------------------------------------------------------- // WARNING Cannot include file kernel/operations/op_convert_color_to_vector.h reason, cannot open file. // END #include "kernel/operations/op_convert_color_to_vector.h" break; } case OP_SEPARATE_R: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_r.h" // -------------------------------------------------------- stack_store_float(result_offset, stack_load_float4(program_line.z).x); // END #include "kernel/operations/op_separate_r.h" break; } case OP_SEPARATE_G: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_g.h" // -------------------------------------------------------- stack_store_float(result_offset, stack_load_float4(program_line.z).y); // END #include "kernel/operations/op_separate_g.h" break; } case OP_SEPARATE_B: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_b.h" // -------------------------------------------------------- stack_store_float(result_offset, stack_load_float4(program_line.z).z); // END #include "kernel/operations/op_separate_b.h" break; } case OP_SEPARATE_A: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_a.h" // -------------------------------------------------------- stack_store_float(result_offset, stack_load_float4(program_line.z).w); // END #include "kernel/operations/op_separate_a.h" break; } case OP_SEPARATE_H: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_h.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_hsv(stack_load_float4(program_line.z)).x); // END #include "kernel/operations/op_separate_h.h" break; } case OP_SEPARATE_S: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_s.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_hsv(stack_load_float4(program_line.z)).y); // END #include "kernel/operations/op_separate_s.h" break; } case OP_SEPARATE_V: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_v.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_hsv(stack_load_float4(program_line.z)).z); // END #include "kernel/operations/op_separate_v.h" break; } case OP_SEPARATE_YUV_Y: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_yuv_y.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_yuv(stack_load_float4(program_line.z)).x); // END #include "kernel/operations/op_separate_yuv_y.h" break; } case OP_SEPARATE_YUV_U: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_yuv_u.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_yuv(stack_load_float4(program_line.z)).y); // END #include "kernel/operations/op_separate_yuv_u.h" break; } case OP_SEPARATE_YUV_V: { // -------------------------------------------------------- // START #include "kernel/operations/op_separate_yuv_v.h" // -------------------------------------------------------- stack_store_float(result_offset, linear_to_yuv(stack_load_float4(program_line.z)).z); // END #include "kernel/operations/op_separate_yuv_v.h" break; } case OP_COMBINE_RGBA: { // -------------------------------------------------------- // START #include "kernel/operations/op_combine_rgba.h" // -------------------------------------------------------- uint4 data; read_program_line(data); float4 result = make_float4( stack_load_float(data.x), stack_load_float(data.y), stack_load_float(data.z), stack_load_float(data.w) ); stack_store_float4(result_offset, result); // END #include "kernel/operations/op_combine_rgba.h" break; } case OP_COMBINE_HSVA: { // -------------------------------------------------------- // START #include "kernel/operations/op_combine_hsva.h" // -------------------------------------------------------- // WARNING Cannot include file kernel/operations/op_combine_hsva.h reason, cannot open file. // END #include "kernel/operations/op_combine_hsva.h" break; } case OP_HSV: { // -------------------------------------------------------- // START #include "kernel/operations/op_hsv.h" // -------------------------------------------------------- uint4 data; read_program_line(data); float4 color = stack_load_float4(program_line.z); float h = stack_load_float(data.x); float s = stack_load_float(data.y); float v = stack_load_float(data.z); float value = stack_load_float(data.w); float4 hsv = linear_to_hsv(color); hsv.x = hsv.x + (h-0.5f); if (hsv.x > 1.0f) hsv.x -= 1.0f; if (hsv.x < 0.0f) hsv.x += 1.0f; hsv.y *= s; hsv.z *= v; color = mix(color, hsv_to_linear(hsv), value); stack_store_float4(result_offset, color); // END #include "kernel/operations/op_hsv.h" break; } case OP_BRIGHTNESS_CONTRAST: { // -------------------------------------------------------- // START #include "kernel/operations/op_brightness_contrast.h" // -------------------------------------------------------- uint4 data; read_program_line(data); float4 color = stack_load_float4(data.x); float brightness = stack_load_float(data.y) / 100.0f; float contrast = stack_load_float(data.z); float delta = contrast / 200.0f; float a, b; float alpha = color.w; a = 1.0f - delta * 2.0f; /* * The algorithm is by Werner D. Streidt * (http://visca.com/ffactory/archives/5-99/msg00021.html) * Extracted of OpenCV demhist.c */ if (contrast > 0) { a = 1.0f / a; b = a * (brightness - delta); } else { delta *= -1; b = a * (brightness + delta); } color = color * a + b; color.w = alpha; // TODO: premul stack_store_float4(result_offset, color); // END #include "kernel/operations/op_brightness_contrast.h" break; } case OP_COLORBALANCE_LGG: { // -------------------------------------------------------- // START #include "kernel/operations/op_colorbalance_lgg.h" // -------------------------------------------------------- #define colorbalance_lgg(in, lift_lgg, gamma_inv, gain)\ {\ float x = (((_linear_to_srgb(in) - 1.0f) * lift_lgg) + 1.0f) * gain;\ if (x < 0.0f) x = 0.0f; \ in = pow(_srgb_to_linear(x), gamma_inv);\ } float4 lift_lgg; float4 gamma_inv; float4 gain; read_program_line_float(lift_lgg); read_program_line_float(gamma_inv); read_program_line_float(gain); float value = stack_load_float(program_line.z); float4 color = stack_load_float4(program_line.w); float4 result; result = color; colorbalance_lgg(result.x, lift_lgg.x, gamma_inv.x, gain.x); colorbalance_lgg(result.y, lift_lgg.y, gamma_inv.y, gain.y); colorbalance_lgg(result.z, lift_lgg.z, gamma_inv.z, gain.z); stack_store_float4(result_offset, mix(color, result, value)); #undef colorbalance_lgg // END #include "kernel/operations/op_colorbalance_lgg.h" break; } case OP_COLORBALANCE_CDL: { // -------------------------------------------------------- // START #include "kernel/operations/op_colorbalance_cdl.h" // -------------------------------------------------------- #define colorbalance_cdl(in, offset, power, slope)\ {\ float x = in * slope + offset;\ if (x < 0.0f) x = 0.0f; \ in = pow(x, power);\ } float4 offset; float4 power; float4 slope; read_program_line_float(offset); read_program_line_float(power); read_program_line_float(slope); float value = stack_load_float(program_line.z); float4 color = stack_load_float4(program_line.w); float4 result; result = color; colorbalance_cdl(result.x, offset.x, power.x, slope.x); colorbalance_cdl(result.y, offset.y, power.y, slope.y); colorbalance_cdl(result.z, offset.z, power.z, slope.z); stack_store_float4(result_offset, mix(color, result, value)); #undef colorbalance_cdl // END #include "kernel/operations/op_colorbalance_cdl.h" break; } default: { kernel_assert(!"Unknown operation type was read"); return; } } } } comp_get_ref(result) /= total_weight; } COMP_NAMESPACE_END #endif // END #include "kernel/operations/operations.h" // END #include "kernel/kernel.h" __kernel void compositor_sample(__write_only image2d_t result, HostKernelGlobals host_kg, __global uint4 program[], uint program_size, __local uint4 local_program[], __global char *buffer1 ) { event_t event = async_work_group_copy(local_program, program, program_size, 0); float4 res; int2 coords = (int2)(get_global_id(0), get_global_id(1)); struct CompositorData __attribute__((aligned)) cd; cd.coord.x = coords.x/(float)host_kg.output_dimension.x; cd.coord.y = coords.y/(float)host_kg.output_dimension.y; struct KernelGlobals __attribute__((aligned)) kg; kg._program = local_program; kg.buffers[0] = buffer1; kg.num_samples = host_kg.num_samples; kg.frame_number = host_kg.frame_number; kg.texture_ptr.index = host_kg.texture_ptr.index; kg.texture_ptr.offset = host_kg.texture_ptr.offset; kg.viewport_ptr.index = host_kg.viewport_ptr.index; kg.viewport_ptr.offset = host_kg.viewport_ptr.offset; kg.aspect_ratio = host_kg.aspect_ratio; kg.seed = host_kg.seed + coords.x* 123 + coords.y *123567; kg.pixel_size = make_float2(1.0/(float)host_kg.output_dimension.x, 1.0/(float)host_kg.output_dimension.y); wait_group_events(1, &event); eval_program(kg, cd, &res); write_imagef(result, coords, res); } :307:9: warning: 'make_int2' macro redefined #define make_int2(x, y) ((ivec2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3199:9: note: previous definition is here #define make_int2(A,B) (int2)((A),(B)) ^ :373:9: warning: 'make_int3' macro redefined #define make_int3(x, y, z) ((int3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3200:9: note: previous definition is here #define make_int3(A,B,C) (int3)((A),(B),(C)) ^ :442:9: warning: 'make_int4' macro redefined #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3201:9: note: previous definition is here #define make_int4(A,B,C,D) (int4)((A),(B),(C),(D)) ^ :491:9: warning: 'make_uint2' macro redefined #define make_uint2(x, y) ((uint2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3205:9: note: previous definition is here #define make_uint2(A,B) (uint2)((A),(B)) ^ :539:9: warning: 'make_uint3' macro redefined #define make_uint3(x, y, z) ((uint3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3206:9: note: previous definition is here #define make_uint3(A,B,C) (uint3)((A),(B),(C)) ^ :588:9: warning: 'make_uint4' macro redefined #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3207:9: note: previous definition is here #define make_uint4(A,B,C,D) (uint4)((A),(B),(C),(D)) ^ :639:9: warning: 'make_float2' macro redefined #define make_float2(x, y) ((float2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3223:9: note: previous definition is here #define make_float2(A,B) (float2)((A),(B)) ^ :705:9: warning: 'make_float3' macro redefined #define make_float3(x, y, z) ((float3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3224:9: note: previous definition is here #define make_float3(A,B,C) (float3)((A),(B),(C)) ^ :775:9: warning: 'make_float4' macro redefined #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3226:9: note: previous definition is here #define make_float4(A,B,C,D) (float4)((A),(B),(C),(D)) ^ :3305:9: warning: 'abs' macro redefined #define abs(x) fabs(x) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4770:13: note: previous definition is here #define abs(__x) __cl_abs(__x) ^ :3309:26: warning: no previous prototype for function 'min4' comp_device_inline float min4(float a, float b, float c, float d) ^ :3314:26: warning: no previous prototype for function 'max4' comp_device_inline float max4(float a, float b, float c, float d) ^ :3376:25: warning: no previous prototype for function 'isnan_safe' comp_device_inline bool isnan_safe(float f) ^ :3411:26: warning: no previous prototype for function 'saturate' comp_device_inline float saturate(float a) ^ :3424:24: warning: no previous prototype for function 'float_to_int' comp_device_inline int float_to_int(float f) ^ :3430:24: warning: no previous prototype for function 'floor_to_int' comp_device_inline int floor_to_int(float f) ^ :3435:24: warning: no previous prototype for function 'ceil_to_int' comp_device_inline int ceil_to_int(float f) ^ :3440:26: warning: no previous prototype for function 'signf' comp_device_inline float signf(float f) ^ :3445:26: warning: no previous prototype for function 'nonzerof' comp_device_inline float nonzerof(float f, float eps) ^ :3453:26: warning: no previous prototype for function 'smoothstepf' comp_device_inline float smoothstepf(float f) ^ :3459:24: warning: no previous prototype for function 'modulo' comp_device_inline int modulo(int x, int m) ^ :3464:27: warning: no previous prototype for function 'float2_to_float3' comp_device_inline float3 float2_to_float3(const float2 a) ^ :3469:27: warning: no previous prototype for function 'float4_to_float3' comp_device_inline float3 float4_to_float3(const float4 a) ^ :3474:27: warning: no previous prototype for function 'float3_to_float4' comp_device_inline float4 float3_to_float4(const float3 a) ^ :4842:25: warning: no previous prototype for function 'make_orthonormals' comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) ^ :4855:27: warning: no previous prototype for function 'safe_invert_color' comp_device_inline float3 safe_invert_color(float3 a) ^ :4866:27: warning: no previous prototype for function 'safe_divide_color' comp_device_inline float3 safe_divide_color(float3 a, float3 b) ^ :4877:27: warning: no previous prototype for function 'safe_divide_even_color' comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) ^ :4915:27: warning: no previous prototype for function 'rotate_around_axis' comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) ^ :4938:26: warning: no previous prototype for function 'safe_sqrt' comp_device_inline float safe_sqrt(float f) ^ :4943:19: warning: no previous prototype for function 'safe_asin' comp_device float safe_asin(float a) ^ :4948:19: warning: no previous prototype for function 'safe_acos' comp_device float safe_acos(float a) ^ :4953:19: warning: no previous prototype for function 'compatible_pow' comp_device float compatible_pow(float x, float y) ^ :4972:19: warning: no previous prototype for function 'safe_pow' comp_device float safe_pow(float a, float b) ^ :4980:19: warning: no previous prototype for function 'safe_divide' comp_device float safe_divide(float a, float b) ^ :4985:19: warning: no previous prototype for function 'safe_log' comp_device float safe_log(float a, float b) ^ :4993:19: warning: no previous prototype for function 'safe_modulo' comp_device float safe_modulo(float a, float b) ^ :4998:26: warning: no previous prototype for function 'xor_signmask' comp_device_inline float xor_signmask(float x, int y) ^ :5016:25: warning: no previous prototype for function 'ray_plane_intersect' comp_device_inline bool ray_plane_intersect(comp_device_struct Ray* ray, float3 planePoint, float3 planeNormal, float* length) { ^ :9918:26: warning: no previous prototype for function '_srgb_to_linear' comp_device_inline float _srgb_to_linear(float c) ^ :9926:26: warning: no previous prototype for function '_linear_to_srgb' comp_device_inline float _linear_to_srgb(float c) ^ :9934:27: warning: no previous prototype for function 'linear_to_hsv' comp_device_inline float4 linear_to_hsv(float4 rgb) ^ :9973:27: warning: no previous prototype for function 'hsv_to_linear' comp_device_inline float4 hsv_to_linear(float4 hsv) ^ :10085:27: warning: no previous prototype for function 'srgb_to_linear' comp_device_inline float4 srgb_to_linear(float4 c) ^ :10093:27: warning: no previous prototype for function 'linear_to_srgb' comp_device_inline float4 linear_to_srgb(float4 c) ^ :10101:27: warning: no previous prototype for function 'linear_to_ycc_itu_bt601' comp_device_inline float4 linear_to_ycc_itu_bt601(float4 c) ^ :10111:27: warning: no previous prototype for function 'linear_to_yuv' comp_device_inline float4 linear_to_yuv(float4 c) ^ :10138:26: warning: no previous prototype for function 'linear_rgb_to_gray' comp_device_inline float linear_rgb_to_gray(float4 c) ^ :10143:26: warning: no previous prototype for function 'component' comp_device_inline float component(float4 c, uint channel) { ^ :10169:26: warning: no previous prototype for function 'get_random_float' comp_device_inline float get_random_float(KernelGlobals * kg) //uniform between 0-1 ^ :10184:71: warning: no previous prototype for function 'kernel_tex_info' comp_device_inline comp_device_global comp_device_struct TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { ^ :10188:24: warning: no previous prototype for function 'wrap_periodic' comp_device_inline int wrap_periodic(int x, int width) ^ :10196:24: warning: no previous prototype for function 'wrap_clamp' comp_device_inline int wrap_clamp(int x, int width) ^ :10201:26: warning: no previous prototype for function 'frac' comp_device_inline float frac(float x, int *ix) ^ :10209:26: warning: no previous prototype for function 'kernel_tex_image_interp' comp_device_inline float kernel_tex_image_interp(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10354:27: warning: no previous prototype for function 'kernel_tex_image_interp_4' comp_device_inline float4 kernel_tex_image_interp_4(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10510:68: warning: no previous prototype for function 'kernel_viewport' comp_device_inline comp_device_global comp_device_struct Viewport* kernel_viewport(KernelGlobals *kg, uint id) { ^ :10517:25: warning: no previous prototype for function 'viewport_to_ray' comp_device_inline void viewport_to_ray(comp_in(comp_device_struct KernelGlobals, kg), int viewport_id, comp_in(float2, coordinates), comp_device_struct Ray* ray) { ^ :10560:27: warning: no previous prototype for function 'ray_to_viewport' comp_device_inline float2 ray_to_viewport(comp_in(comp_device_struct KernelGlobals, kg), comp_device_struct Ray* ray, int viewport_id) { ^ :10762:25: warning: no previous prototype for function 'eval_program' comp_device_inline void eval_program( ^ :12009:17: error: parameter may not be qualified with an address space __global uint4 program[], ^ :12010:35: error: parameter may not be qualified with an address space uint program_size, __local uint4 local_program[], __global char *buffer1 ^ creating kernel OPENCL error: [CL_INVALID_PROGRAM_EXECUTABLE] : OpenCL Error : Failed to create kernel! Could not find a device with a built executable for this kernel. clCreateKernel CL_INVALID_PROGRAM_EXECUTABLE creating command queue OPENCL error: [CL_DEVICE_NOT_AVAILABLE] : OpenCL Error : Error: Build Program driver returned (-2) OPENCL error: OpenCL Warning : clBuildProgram failed: could not build program for 0x2021c00 (AMD Radeon HD - FirePro D700 Compute Engine) (err:-2) OPENCL error: [CL_BUILD_ERROR] : OpenCL Build Error : Compiler build log: :307:9: warning: 'make_int2' macro redefined #define make_int2(x, y) ((ivec2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3199:9: note: previous definition is here #define make_int2(A,B) (int2)((A),(B)) ^ :373:9: warning: 'make_int3' macro redefined #define make_int3(x, y, z) ((int3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3200:9: note: previous definition is here #define make_int3(A,B,C) (int3)((A),(B),(C)) ^ :442:9: warning: 'make_int4' macro redefined #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3201:9: note: previous definition is here #define make_int4(A,B,C,D) (int4)((A),(B),(C),(D)) ^ :491:9: warning: 'make_uint2' macro redefined #define make_uint2(x, y) ((uint2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3205:9: note: previous definition is here #define make_uint2(A,B) (uint2)((A),(B)) ^ :539:9: warning: 'make_uint3' macro redefined #define make_uint3(x, y, z) ((uint3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3206:9: note: previous definition is here #define make_uint3(A,B,C) (uint3)((A),(B),(C)) ^ :588:9: warning: 'make_uint4' macro redefined #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3207:9: note: previous definition is here #define make_uint4(A,B,C,D) (uint4)((A),(B),(C),(D)) ^ :639:9: warning: 'make_float2' macro redefined #define make_float2(x, y) ((float2)(x, y)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3223:9: note: previous definition is here #define make_float2(A,B) (float2)((A),(B)) ^ :705:9: warning: 'make_float3' macro redefined #define make_float3(x, y, z) ((float3)(x, y, z)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3224:9: note: previous definition is here #define make_float3(A,B,C) (float3)((A),(B),(C)) ^ :775:9: warning: 'make_float4' macro redefined #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:3226:9: note: previous definition is here #define make_float4(A,B,C,D) (float4)((A),(B),(C),(D)) ^ :3305:9: warning: 'abs' macro redefined #define abs(x) fabs(x) ^ /System/Library/Frameworks/OpenCL.framework/Versions/A/lib/clang/3.2/include/cl_kernel.h:4770:13: note: previous definition is here #define abs(__x) __cl_abs(__x) ^ :3309:26: warning: no previous prototype for function 'min4' comp_device_inline float min4(float a, float b, float c, float d) ^ :3314:26: warning: no previous prototype for function 'max4' comp_device_inline float max4(float a, float b, float c, float d) ^ :3376:25: warning: no previous prototype for function 'isnan_safe' comp_device_inline bool isnan_safe(float f) ^ :3411:26: warning: no previous prototype for function 'saturate' comp_device_inline float saturate(float a) ^ :3424:24: warning: no previous prototype for function 'float_to_int' comp_device_inline int float_to_int(float f) ^ :3430:24: warning: no previous prototype for function 'floor_to_int' comp_device_inline int floor_to_int(float f) ^ :3435:24: warning: no previous prototype for function 'ceil_to_int' comp_device_inline int ceil_to_int(float f) ^ :3440:26: warning: no previous prototype for function 'signf' comp_device_inline float signf(float f) ^ :3445:26: warning: no previous prototype for function 'nonzerof' comp_device_inline float nonzerof(float f, float eps) ^ :3453:26: warning: no previous prototype for function 'smoothstepf' comp_device_inline float smoothstepf(float f) ^ :3459:24: warning: no previous prototype for function 'modulo' comp_device_inline int modulo(int x, int m) ^ :3464:27: warning: no previous prototype for function 'float2_to_float3' comp_device_inline float3 float2_to_float3(const float2 a) ^ :3469:27: warning: no previous prototype for function 'float4_to_float3' comp_device_inline float3 float4_to_float3(const float4 a) ^ :3474:27: warning: no previous prototype for function 'float3_to_float4' comp_device_inline float4 float3_to_float4(const float3 a) ^ :4842:25: warning: no previous prototype for function 'make_orthonormals' comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) ^ :4855:27: warning: no previous prototype for function 'safe_invert_color' comp_device_inline float3 safe_invert_color(float3 a) ^ :4866:27: warning: no previous prototype for function 'safe_divide_color' comp_device_inline float3 safe_divide_color(float3 a, float3 b) ^ :4877:27: warning: no previous prototype for function 'safe_divide_even_color' comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) ^ :4915:27: warning: no previous prototype for function 'rotate_around_axis' comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) ^ :4938:26: warning: no previous prototype for function 'safe_sqrt' comp_device_inline float safe_sqrt(float f) ^ :4943:19: warning: no previous prototype for function 'safe_asin' comp_device float safe_asin(float a) ^ :4948:19: warning: no previous prototype for function 'safe_acos' comp_device float safe_acos(float a) ^ :4953:19: warning: no previous prototype for function 'compatible_pow' comp_device float compatible_pow(float x, float y) ^ :4972:19: warning: no previous prototype for function 'safe_pow' comp_device float safe_pow(float a, float b) ^ :4980:19: warning: no previous prototype for function 'safe_divide' comp_device float safe_divide(float a, float b) ^ :4985:19: warning: no previous prototype for function 'safe_log' comp_device float safe_log(float a, float b) ^ :4993:19: warning: no previous prototype for function 'safe_modulo' comp_device float safe_modulo(float a, float b) ^ :4998:26: warning: no previous prototype for function 'xor_signmask' comp_device_inline float xor_signmask(float x, int y) ^ :5016:25: warning: no previous prototype for function 'ray_plane_intersect' comp_device_inline bool ray_plane_intersect(comp_device_struct Ray* ray, float3 planePoint, float3 planeNormal, float* length) { ^ :9918:26: warning: no previous prototype for function '_srgb_to_linear' comp_device_inline float _srgb_to_linear(float c) ^ :9926:26: warning: no previous prototype for function '_linear_to_srgb' comp_device_inline float _linear_to_srgb(float c) ^ :9934:27: warning: no previous prototype for function 'linear_to_hsv' comp_device_inline float4 linear_to_hsv(float4 rgb) ^ :9973:27: warning: no previous prototype for function 'hsv_to_linear' comp_device_inline float4 hsv_to_linear(float4 hsv) ^ :10085:27: warning: no previous prototype for function 'srgb_to_linear' comp_device_inline float4 srgb_to_linear(float4 c) ^ :10093:27: warning: no previous prototype for function 'linear_to_srgb' comp_device_inline float4 linear_to_srgb(float4 c) ^ :10101:27: warning: no previous prototype for function 'linear_to_ycc_itu_bt601' comp_device_inline float4 linear_to_ycc_itu_bt601(float4 c) ^ :10111:27: warning: no previous prototype for function 'linear_to_yuv' comp_device_inline float4 linear_to_yuv(float4 c) ^ :10138:26: warning: no previous prototype for function 'linear_rgb_to_gray' comp_device_inline float linear_rgb_to_gray(float4 c) ^ :10143:26: warning: no previous prototype for function 'component' comp_device_inline float component(float4 c, uint channel) { ^ :10169:26: warning: no previous prototype for function 'get_random_float' comp_device_inline float get_random_float(KernelGlobals * kg) //uniform between 0-1 ^ :10184:71: warning: no previous prototype for function 'kernel_tex_info' comp_device_inline comp_device_global comp_device_struct TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { ^ :10188:24: warning: no previous prototype for function 'wrap_periodic' comp_device_inline int wrap_periodic(int x, int width) ^ :10196:24: warning: no previous prototype for function 'wrap_clamp' comp_device_inline int wrap_clamp(int x, int width) ^ :10201:26: warning: no previous prototype for function 'frac' comp_device_inline float frac(float x, int *ix) ^ :10209:26: warning: no previous prototype for function 'kernel_tex_image_interp' comp_device_inline float kernel_tex_image_interp(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10354:27: warning: no previous prototype for function 'kernel_tex_image_interp_4' comp_device_inline float4 kernel_tex_image_interp_4(comp_device_struct KernelGlobals *kg, int id, float x, float y){ ^ :10510:68: warning: no previous prototype for function 'kernel_viewport' comp_device_inline comp_device_global comp_device_struct Viewport* kernel_viewport(KernelGlobals *kg, uint id) { ^ :10517:25: warning: no previous prototype for function 'viewport_to_ray' comp_device_inline void viewport_to_ray(comp_in(comp_device_struct KernelGlobals, kg), int viewport_id, comp_in(float2, coordinates), comp_device_struct Ray* ray) { ^ :10560:27: warning: no previous prototype for function 'ray_to_viewport' comp_device_inline float2 ray_to_viewport(comp_in(comp_device_struct KernelGlobals, kg), comp_device_struct Ray* ray, int viewport_id) { ^ :10762:25: warning: no previous prototype for function 'eval_program' comp_device_inline void eval_program( ^ :12009:17: error: parameter may not be qualified with an address space __global uint4 program[], ^ :12010:35: error: parameter may not be qualified with an address space uint program_size, __local uint4 local_program[], __global char *buffer1 ^ CLERROR[-11]: CL_BUILD_PROGRAM_FAILURE // -------------------------------------------------------- // START #include "kernel/kernels/opencl/kernel_opencl_defines.h" // -------------------------------------------------------- #define _KERNEL_OPENCL_ #define COMP_NAMESPACE_BEGIN #define COMP_NAMESPACE_END #define comp_device_global __global #define comp_device_inline #define comp_device_noinline #define comp_device #define comp_device_struct struct #define comp_get_ref(var) *var #define comp_in(type, var) type var #define comp_out(type, var) type *var #define comp_inout(type, var) type *var #define comp_inout_array(type, var) type var #define kernel_assert(cond) #define UINT(value) ((uint)(value)) #define comp_attribute_packed __attribute__ ((packed)) #define ATTR_FALLTHROUGH ((void)0) //#define COMP_OPENCL_COPY_TO_LOCAL //#undef COMP_OPENCL_COPY_TO_LOCAL // END #include "kernel/kernels/opencl/kernel_opencl_defines.h" // -------------------------------------------------------- // START #include "kernel/kernel_compat_opencl.h" // -------------------------------------------------------- #ifndef _KERNEL_COMPAT_OPENCL_H_ #define _KERNEL_COMPAT_OPENCL_H_ #endif // END #include "kernel/kernel_compat_opencl.h" // -------------------------------------------------------- // START #include "kernel/kernel.h" // -------------------------------------------------------- // -------------------------------------------------------- // START #include "kernel/kernel_globals.h" // -------------------------------------------------------- #ifndef _KERNEL_GLOBALS_H_ #define _KERNEL_GLOBALS_H_ #ifdef _KERNEL_CPU_ # include "util/util_vector.h" #endif // -------------------------------------------------------- // START #include "util/util_viewport.h" // -------------------------------------------------------- #ifndef _UTIL_VIEWPORT_H_ #define _UTIL_VIEWPORT_H_ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN // SYNC with DNA_camera_types enum { // Blender Camera Emulation CAM_PERSP = 0, CAM_ORTHO = 1, CAM_PANO = 2, // Viewport settings. CAM_PLANE = 3, }; struct Viewport { float3 position; float3 direction; float3 up_vector; float3 side_vector; float3 plane_position; int type; union{ struct { float scale; }; struct { float scale_x; float scale_y; }; }; union { // CAM_PERSP struct { float lens; float sensor_size; float field_of_view; }; // CAM_ORTHO struct { }; }; float padding[3]; }; COMP_NAMESPACE_END #endif // END #include "util/util_viewport.h" COMP_NAMESPACE_BEGIN // Start section - the next structs are shared with the Host typedef struct comp_attribute_packed MemoryPTR { uint index; uint padding; ulong offset; } MemoryPTR; typedef struct comp_attribute_packed TextureInfo { MemoryPTR data_ptr; uint components; uint width; uint height; uint interpolation; uint extension; uint padding; } TextureInfo; typedef struct HostKernelGlobals { MemoryPTR texture_ptr; MemoryPTR viewport_ptr; uint num_samples; uint frame_number; uint2 output_dimension; float aspect_ratio; uint seed; } HostKernelGlobals; // End section #ifdef _KERNEL_CPU_ typedef struct KernelGlobals { uint4 *_program; char* buffers[1]; MemoryPTR texture_ptr; MemoryPTR viewport_ptr; int num_samples; int frame_number; float aspect_ratio; uint seed; float2 pixel_size; } KernelGlobals; #endif #ifdef _KERNEL_OPENCL_ typedef struct KernelGlobals { __local uint4 *_program; __global char* buffers[1]; MemoryPTR texture_ptr; MemoryPTR viewport_ptr; int num_samples; int frame_number; float aspect_ratio; uint seed; float2 pixel_size; } KernelGlobals; #endif COMP_NAMESPACE_END #endif // END #include "kernel/kernel_globals.h" // -------------------------------------------------------- // START #include "kernel/kernel_types.h" // -------------------------------------------------------- COMP_NAMESPACE_BEGIN struct CompositorData { float2 coord; int sample_number; }; struct Ray { float3 P; /* origin */ float3 D; /* direction */ // float3 up; /* up vector */ float3 side; /* side vector */ float store_float1; /* store to transfer data from ray manipulator to operation */ }; COMP_NAMESPACE_END // END #include "kernel/kernel_types.h" // -------------------------------------------------------- // START #include "util/util_math.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_H_ #define _UTIL_MATH_H_ /* Math * * Basic math functions on scalar and vector types. This header is used by * both the kernel code when compiled as C++, and other C++ non-kernel code. */ #ifdef _KERNEL_CPU_ # include # include # include # include #endif /* _KERNEL_CPU_ */ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN /* Float Pi variations */ /* Division */ #ifndef M_PI_F # define M_PI_F (3.1415926535897932f) /* pi */ #endif #ifndef M_PI_2_F # define M_PI_2_F (1.5707963267948966f) /* pi/2 */ #endif #ifndef M_PI_4_F # define M_PI_4_F (0.7853981633974830f) /* pi/4 */ #endif #ifndef M_1_PI_F # define M_1_PI_F (0.3183098861837067f) /* 1/pi */ #endif #ifndef M_2_PI_F # define M_2_PI_F (0.6366197723675813f) /* 2/pi */ #endif /* Multiplication */ #ifndef M_2PI_F # define M_2PI_F (6.2831853071795864f) /* 2*pi */ #endif #ifndef M_4PI_F # define M_4PI_F (12.566370614359172f) /* 4*pi */ #endif /* Float sqrt variations */ #ifndef M_SQRT2_F # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif #ifndef M_LN2_F # define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif #ifndef M_LN10_F # define M_LN10_F (2.3025850929940457f) /* ln(10) */ #endif /* Scalar */ #ifdef _WIN32 # ifndef __KERNEL_OPENCL__ comp_device_inline float fmaxf(float a, float b) { return (a > b)? a: b; } comp_device_inline float fminf(float a, float b) { return (a < b)? a: b; } # endif /* !__KERNEL_OPENCL__ */ #endif /* _WIN32 */ #ifdef _KERNEL_CPU_ using std::isfinite; using std::isnan; using std::fabs; #define floor(var) floorf(var) #define ceil(var) ceilf(var) #define cos(var) cosf(var) #define sin(var) sinf(var) #define sqrt(var) sqrtf(var) #define asin(var) asinf(var) #define acos(var) acos(var) #define mod(var1, var2) fmodf(var1, var2) #define pow(var1, var2) powf(var1, var2) #define log(var) logf(var) comp_device_inline int max(int a, int b) { return (a > b)? a: b; } comp_device_inline int min(int a, int b) { return (a < b)? a: b; } comp_device_inline float max(float a, float b) { return (a > b)? a: b; } comp_device_inline float min(float a, float b) { return (a < b)? a: b; } comp_device_inline double max(double a, double b) { return (a > b)? a: b; } comp_device_inline double min(double a, double b) { return (a < b)? a: b; } /* These 2 guys are templated for usage with registers data. * * NOTE: Since this is CPU-only functions it is ok to use references here. * But for other devices we'll need to be careful about this. */ template comp_device_inline T min4(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } template comp_device_inline T max4(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define abs(x) fabs(x) #define mod(x, y) fmod(x, y) #endif /* _KERNEL_OPENCL_ */ comp_device_inline float min4(float a, float b, float c, float d) { return min(min(a, b), min(c, d)); } comp_device_inline float max4(float a, float b, float c, float d) { return max(max(a, b), max(c, d)); } #ifdef _KERNEL_CPU_ /* Int/Float conversion */ comp_device_inline int uint_as_int(uint i) { union { uint ui; int i; } u; u.ui = i; return u.i; } comp_device_inline uint int_as_uint(int i) { union { uint ui; int i; } u; u.i = i; return u.ui; } comp_device_inline int float_as_int(float f) { union { int i; float f; } u; u.f = f; return u.i; } comp_device_inline float int_as_float(int i) { union { int i; float f; } u; u.i = i; return u.f; } comp_device_inline uint float_as_uint(float f) { union { uint i; float f; } u; u.f = f; return u.i; } comp_device_inline float uint_as_float(uint i) { union { uint i; float f; } u; u.i = i; return u.f; } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define uint_as_int(i) as_int(i) #define int_as_uint(i) as_uint(i) #define float_as_int(f) as_int(f) #define int_as_float(i) as_float(i) #define float_as_uint(f) as_uint(f) #define uint_as_float(i) as_float(i) #endif /* Versions of functions which are safe for fast math. */ comp_device_inline bool isnan_safe(float f) { uint x = float_as_uint(f); return (x << 1) > 0xff000000u; } // comp_device_inline bool isfinite_safe(float f) // { // /* By IEEE 754 rule, 2*Inf equals Inf */ // uint x = float_as_uint(f); // return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u); // } // comp_device_inline float ensure_finite(float v) // { // return isfinite_safe(v)? v : 0.0f; // } #ifdef _KERNEL_CPU_ comp_device_inline int clamp(int a, int mn, int mx) { return min(max(a, mn), mx); } comp_device_inline float clamp(float a, float mn, float mx) { return min(max(a, mn), mx); } comp_device_inline float mix(float a, float b, float t) { return a + t*(b - a); } #endif /* __KERNEL_OPENCL__ */ comp_device_inline float saturate(float a) { return clamp(a, 0.0f, 1.0f); } #ifdef _KERNEL_CPU_ comp_device_inline int float_to_int(float f) { return (int)f; } #endif #ifdef _KERNEL_OPENCL_ comp_device_inline int float_to_int(float f) { return convert_int(f); } #endif /* _KERNEL_OPENCL_ */ comp_device_inline int floor_to_int(float f) { return float_to_int(floor(f)); } comp_device_inline int ceil_to_int(float f) { return float_to_int(ceil(f)); } comp_device_inline float signf(float f) { return (f < 0.0f)? -1.0f: 1.0f; } comp_device_inline float nonzerof(float f, float eps) { if(abs(f) < eps) return signf(f)*eps; else return f; } comp_device_inline float smoothstepf(float f) { float ff = f*f; return (3.0f*ff - 2.0f*ff*f); } comp_device_inline int modulo(int x, int m) { return (x % m + m) % m; } comp_device_inline float3 float2_to_float3(const float2 a) { return make_float3(a.x, a.y, 0.0f); } comp_device_inline float3 float4_to_float3(const float4 a) { return make_float3(a.x, a.y, a.z); } comp_device_inline float4 float3_to_float4(const float3 a) { return make_float4(a.x, a.y, a.z, 1.0f); } COMP_NAMESPACE_END // -------------------------------------------------------- // START #include "util/util_math_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT2_H_ #define _UTIL_MATH_INT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b); comp_device_inline int2 operator+(const int2 &a, const int2 &b); comp_device_inline int2 operator+=(int2 &a, const int2 &b); comp_device_inline int2 operator-(const int2 &a, const int2 &b); comp_device_inline int2 operator*(const int2 &a, const int2 &b); comp_device_inline int2 operator/(const int2 &a, const int2 &b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b) { return (a.x == b.x && a.y == b.y); } comp_device_inline int2 operator+(const int2 &a, const int2 &b) { return make_int2(a.x + b.x, a.y + b.y); } comp_device_inline int2 operator+=(int2 &a, const int2 &b) { return a = a + b; } comp_device_inline int2 operator-(const int2 &a, const int2 &b) { return make_int2(a.x - b.x, a.y - b.y); } comp_device_inline int2 operator*(const int2 &a, const int2 &b) { return make_int2(a.x * b.x, a.y * b.y); } comp_device_inline int2 operator/(const int2 &a, const int2 &b) { return make_int2(a.x / b.x, a.y / b.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT2_H_ */ // END #include "util/util_math_int2.h" // -------------------------------------------------------- // START #include "util/util_math_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT3_H_ #define _UTIL_MATH_INT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b); comp_device_inline int3 max(int3 a, int3 b); comp_device_inline int3 clamp(const int3& a, int mn, int mx); comp_device_inline int3 clamp(const int3& a, int3& mn, int mx); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_min_epi32(a.m128, b.m128)); #else return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline int3 max(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_max_epi32(a.m128, b.m128)); #else return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline int3 clamp(const int3& a, int mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, make_int3(mn)), make_int3(mx)); #else return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); #endif } comp_device_inline int3 clamp(const int3& a, int3& mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, mn), make_int3(mx)); #else return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT3_H_ */ // END #include "util/util_math_int3.h" // -------------------------------------------------------- // START #include "util/util_math_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT4_H_ #define _UTIL_MATH_INT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b); comp_device_inline int4 operator+=(int4& a, const int4& b); comp_device_inline int4 operator>>(const int4& a, int i); comp_device_inline int4 min(int4 a, int4 b); comp_device_inline int4 max(int4 a, int4 b); comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx); comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ return int4(_mm_add_epi32(a.m128, b.m128)); #else return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline int4 operator+=(int4& a, const int4& b) { return a = a + b; } comp_device_inline int4 operator>>(const int4& a, int i) { #ifdef __KERNEL_SSE__ return int4(_mm_srai_epi32(a.m128, i)); #else return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif } comp_device_inline int4 min(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_min_epi32(a.m128, b.m128)); #else return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline int4 max(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_max_epi32(a.m128, b.m128)); #else return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) { return min(max(a, mn), mx); } comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ const __m128 m = _mm_cvtepi32_ps(mask); /* TODO(sergey): avoid cvt. */ return int4(_mm_castps_si128( _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); #else return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline int4 load_int4(const int *v) { #ifdef __KERNEL_SSE__ return int4(_mm_loadu_si128((__m128i*)v)); #else return make_int4(v[0], v[1], v[2], v[3]); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT4_H_ */ // END #include "util/util_math_int4.h" // -------------------------------------------------------- // START #include "util/util_math_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT2_H_ #define _UTIL_MATH_FLOAT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a); comp_device_inline float2 operator*(const float2& a, const float2& b); comp_device_inline float2 operator*(const float2& a, float f); comp_device_inline float2 operator*(float f, const float2& a); comp_device_inline float2 operator/(float f, const float2& a); comp_device_inline float2 operator/(const float2& a, float f); comp_device_inline float2 operator/(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float b); comp_device_inline float2 operator-(const float2& a, const float2& b); comp_device_inline float2 operator+=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, float f); comp_device_inline float2 operator/=(float2& a, const float2& b); comp_device_inline float2 operator/=(float2& a, float f); comp_device_inline bool operator==(const float2& a, const float2& b); comp_device_inline bool operator!=(const float2& a, const float2& b); comp_device_inline bool is_zero(const float2& a); comp_device_inline float average(const float2& a); comp_device_inline float dot(const float2& a, const float2& b); comp_device_inline float cross(const float2& a, const float2& b); comp_device_inline float len(const float2& a); comp_device_inline float2 normalize(const float2& a); comp_device_inline float2 normalize_len(const float2& a, float *t); comp_device_inline float2 safe_normalize(const float2& a); comp_device_inline float2 min(const float2& a, const float2& b); comp_device_inline float2 max(const float2& a, const float2& b); comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx); comp_device_inline float2 fabs(const float2& a); comp_device_inline float2 as_float2(const float4& a); comp_device_inline float2 mix(const float2& a, const float2& b, float t); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #define length_v2(a) sqrt(dot(a, a)) #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a) { return make_float2(-a.x, -a.y); } comp_device_inline float2 operator*(const float2& a, const float2& b) { return make_float2(a.x*b.x, a.y*b.y); } comp_device_inline float2 operator*(const float2& a, float f) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator*(float f, const float2& a) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator/(float f, const float2& a) { return make_float2(f/a.x, f/a.y); } comp_device_inline float2 operator/(const float2& a, float f) { float invf = 1.0f/f; return make_float2(a.x*invf, a.y*invf); } comp_device_inline float2 operator/(const float2& a, const float2& b) { return make_float2(a.x/b.x, a.y/b.y); } comp_device_inline float2 operator+(const float2& a, const float2& b) { return make_float2(a.x+b.x, a.y+b.y); } comp_device_inline float2 operator+(const float2& a, float b) { return make_float2(a.x+b, a.y+b); } comp_device_inline float2 operator-(const float2& a, const float2& b) { return make_float2(a.x-b.x, a.y-b.y); } comp_device_inline float2 operator+=(float2& a, const float2& b) { return a = a + b; } comp_device_inline float2 operator*=(float2& a, const float2& b) { return a = a * b; } comp_device_inline float2 operator*=(float2& a, float f) { return a = a * f; } comp_device_inline float2 operator/=(float2& a, const float2& b) { return a = a / b; } comp_device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float2& a, const float2& b) { return (a.x == b.x && a.y == b.y); } comp_device_inline bool operator!=(const float2& a, const float2& b) { return !(a == b); } comp_device_inline bool is_zero(const float2& a) { return (a.x == 0.0f && a.y == 0.0f); } comp_device_inline float average(const float2& a) { return (a.x + a.y)*(1.0f/2.0f); } comp_device_inline float dot(const float2& a, const float2& b) { return a.x*b.x + a.y*b.y; } comp_device_inline float cross(const float2& a, const float2& b) { return (a.x*b.y - a.y*b.x); } comp_device_inline float len(const float2& a) { return sqrt(dot(a, a)); } comp_device_inline float2 normalize(const float2& a) { return a/len(a); } comp_device_inline float2 normalize_len(const float2& a, float *t) { *t = len(a); return a/(*t); } comp_device_inline float2 safe_normalize(const float2& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float2 min(const float2& a, const float2& b) { return make_float2(min(a.x, b.x), min(a.y, b.y)); } comp_device_inline float2 max(const float2& a, const float2& b) { return make_float2(max(a.x, b.x), max(a.y, b.y)); } comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx) { return min(max(a, mn), mx); } comp_device_inline float2 fabs(const float2& a) { return make_float2(abs(a.x), abs(a.y)); } comp_device_inline float2 as_float2(const float4& a) { return make_float2(a.x, a.y); } comp_device_inline float2 mix(const float2& a, const float2& b, float t) { return a + t*(b - a); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* __UTIL_MATH_FLOAT2_H__ */ // END #include "util/util_math_float2.h" // -------------------------------------------------------- // START #include "util/util_math_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT3_H_ #define _UTIL_MATH_FLOAT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a); comp_device_inline float3 operator*(const float3& a, const float3& b); comp_device_inline float3 operator*(const float3& a, const float f); comp_device_inline float3 operator*(const float f, const float3& a); comp_device_inline float3 operator/(const float f, const float3& a); comp_device_inline float3 operator/(const float3& a, const float f); comp_device_inline float3 operator/(const float3& a, const float3& b); comp_device_inline float3 operator+(const float3& a, const float3& b); comp_device_inline float3 operator-(const float3& a, const float3& b); comp_device_inline float3 operator+=(float3& a, const float3& b); comp_device_inline float3 operator-=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, float f); comp_device_inline float3 operator/=(float3& a, const float3& b); comp_device_inline float3 operator/=(float3& a, float f); comp_device_inline bool operator==(const float3& a, const float3& b); comp_device_inline bool operator!=(const float3& a, const float3& b); comp_device_inline float dot(const float3& a, const float3& b); comp_device_inline float dot_xy(const float3& a, const float3& b); comp_device_inline float3 cross(const float3& a, const float3& b); comp_device_inline float3 normalize(const float3& a); comp_device_inline float3 min(const float3& a, const float3& b); comp_device_inline float3 max(const float3& a, const float3& b); comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx); comp_device_inline float3 clamp(const float3& a, float mn, float mx); comp_device_inline float3 fabs(const float3& a); comp_device_inline float3 mix(const float3& a, const float3& b, float t); comp_device_inline float3 rcp(const float3& a); #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a); comp_device_inline float len(const float3 a); comp_device_inline float len_squared(const float3 a); #define length_v3(a) sqrt(dot(a, a)) comp_device_inline float3 saturate3(float3 a); comp_device_inline float3 safe_normalize(const float3 a); comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline bool is_zero(const float3 a); comp_device_inline float reduce_add(const float3 a); comp_device_inline float average(const float3 a); comp_device_inline bool isequal_float3(const float3 a, const float3 b); /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a) { #ifdef __KERNEL_SSE__ return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); #else return make_float3(-a.x, -a.y, -a.z); #endif } comp_device_inline float3 operator*(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,b.m128)); #else return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); #endif } comp_device_inline float3 operator*(const float3& a, const float f) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f))); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator*(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator/(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(a.m128); return float3(_mm_mul_ps(_mm_set1_ps(f),rc)); #else return make_float3(f / a.x, f / a.y, f / a.z); #endif } comp_device_inline float3 operator/(const float3& a, const float f) { float invf = 1.0f/f; return a * invf; } comp_device_inline float3 operator/(const float3& a, const float3& b) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(b.m128); return float3(_mm_mul_ps(a, rc)); #else return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); #endif } comp_device_inline float3 operator+(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_add_ps(a.m128, b.m128)); #else return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); #endif } comp_device_inline float3 operator-(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_sub_ps(a.m128, b.m128)); #else return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); #endif } comp_device_inline float3 operator+=(float3& a, const float3& b) { return a = a + b; } comp_device_inline float3 operator-=(float3& a, const float3& b) { return a = a - b; } comp_device_inline float3 operator*=(float3& a, const float3& b) { return a = a * b; } comp_device_inline float3 operator*=(float3& a, float f) { return a = a * f; } comp_device_inline float3 operator/=(float3& a, const float3& b) { return a = a / b; } comp_device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; #else return (a.x == b.x && a.y == b.y && a.z == b.z); #endif } comp_device_inline bool operator!=(const float3& a, const float3& b) { return !(a == b); } comp_device_inline float dot(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); #else return a.x*b.x + a.y*b.y + a.z*b.z; #endif } comp_device_inline float dot_xy(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b)); #else return a.x*b.x + a.y*b.y; #endif } comp_device_inline float3 cross(const float3& a, const float3& b) { float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); return r; } comp_device_inline float3 normalize(const float3& a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return float3(_mm_div_ps(a.m128, norm)); #else return a/len(a); #endif } comp_device_inline float3 min(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_min_ps(a.m128, b.m128)); #else return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline float3 max(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_max_ps(a.m128, b.m128)); #else return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx) { return min(max(a, mn), mx); } comp_device_inline float3 clamp(const float3& a, float mn, float mx) { return clamp(a, make_float3(mn,mn,mn), make_float3(mx,mx,mx)); } comp_device_inline float3 fabs(const float3& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return float3(_mm_and_ps(a.m128, mask)); #else return make_float3(fabs(a.x), fabs(a.y), fabs(a.z)); #endif } comp_device_inline float3 mix(const float3& a, const float3& b, float t) { return a + t*(b - a); } comp_device_inline float3 rcp(const float3& a) { #ifdef __KERNEL_SSE__ const float4 r(_mm_rcp_ps(a.m128)); return float3(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); #endif } #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a) { return max(max(a.x, a.y), a.z); } comp_device_inline float len(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else return sqrt(dot(a, a)); #endif } comp_device_inline float len_squared(const float3 a) { return dot(a, a); } comp_device_inline float3 saturate3(float3 a) { return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); } comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); float x = 1.0f / comp_get_ref(t); return a*x; } comp_device_inline float3 safe_normalize(const float3 a) { float t = len(a); return (t != 0.0f)? a * (1.0f/t) : a; } comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); return (comp_get_ref(t) != 0.0f)? a/(comp_get_ref(t)): a; } comp_device_inline bool is_zero(const float3 a) { #ifdef __KERNEL_SSE__ return a == make_float3(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); #endif } comp_device_inline float reduce_add(const float3 a) { return (a.x + a.y + a.z); } comp_device_inline float average(const float3 a) { return reduce_add(a)*(1.0f/3.0f); } comp_device_inline bool isequal_float3(const float3 a, const float3 b) { #ifdef _KERNEL_OPENCL_ return all(a == b); #else return a == b; #endif } COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT3_H_ */ // END #include "util/util_math_float3.h" // -------------------------------------------------------- // START #include "util/util_math_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT4_H_ #define _UTIL_MATH_FLOAT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a); comp_device_inline float4 operator*(const float4& a, const float4& b); comp_device_inline float4 operator*(const float4& a, float f); comp_device_inline float4 operator*(float f, const float4& a); comp_device_inline float4 operator/(const float4& a, float f); comp_device_inline float4 operator/(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, float f); comp_device_inline float4 operator-(const float4& a, const float4& b); comp_device_inline float4 operator-(const float4& a, const float b); comp_device_inline float4 operator+=(float4& a, const float4& b); comp_device_inline float4 operator*=(float4& a, const float4& b); comp_device_inline float4 operator/=(float4& a, float f); comp_device_inline int4 operator<(const float4& a, const float4& b); comp_device_inline int4 operator>=(const float4& a, const float4& b); comp_device_inline int4 operator<=(const float4& a, const float4& b); comp_device_inline bool operator==(const float4& a, const float4& b); comp_device_inline float dot(const float4& a, const float4& b); comp_device_inline float4 fabs(const float4& a); comp_device_inline float len_squared(const float4& a); comp_device_inline float4 rcp(const float4& a); comp_device_inline float4 cross(const float4& a, const float4& b); comp_device_inline bool is_zero(const float4& a); comp_device_inline float reduce_add(const float4& a); comp_device_inline float average(const float4& a); comp_device_inline float len(const float4& a); comp_device_inline float4 normalize(const float4& a); comp_device_inline float4 safe_normalize(const float4& a); comp_device_inline float4 min(const float4& a, const float4& b); comp_device_inline float4 max(const float4& a, const float4& b); comp_device_inline float4 mix(const float4& a, const float4& b, float t); #endif /* _KERNEL_CPU_ */ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b); template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b); # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b); template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b); # endif #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b); comp_device_inline float4 reduce_min(const float4& a); comp_device_inline float4 reduce_max(const float4& a); # if 0 comp_device_inline float4 reduce_add(const float4& a); # endif #endif /* !_KERNEL_GPU_ */ #define length_v4(a) sqrt(dot(a, a)) /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return float4(_mm_xor_ps(a.m128, mask)); #else return make_float4(-a.x, -a.y, -a.z, -a.w); #endif } comp_device_inline float4 operator*(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_mul_ps(a.m128, b.m128)); #else return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); #endif } comp_device_inline float4 operator*(const float4& a, float f) { #if defined(__KERNEL_SSE__) return a * make_float4(f); #else return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); #endif } comp_device_inline float4 operator*(float f, const float4& a) { return a * f; } comp_device_inline float4 operator/(const float4& a, float f) { return a * (1.0f/f); } comp_device_inline float4 operator/(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return a * rcp(b); #else return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_add_ps(a.m128, b.m128)); #else return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float b) { return make_float4(a.x+b, a.y+b, a.z+b, a.w+b); } comp_device_inline float4 operator-(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_sub_ps(a.m128, b.m128)); #else return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); #endif } comp_device_inline float4 operator-(const float4& a, const float b) { return make_float4(a.x-b, a.y-b, a.z-b, a.w-b); } comp_device_inline float4 operator+=(float4& a, const float4& b) { return a = a + b; } comp_device_inline float4 operator*=(float4& a, const float4& b) { return a = a * b; } comp_device_inline float4 operator/=(float4& a, float f) { return a = a / f; } comp_device_inline int4 operator<(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128))); #else return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); #endif } comp_device_inline int4 operator>=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128))); #else return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); #endif } comp_device_inline int4 operator<=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128))); #else return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); #endif } comp_device_inline bool operator==(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; #else return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); #endif } comp_device_inline float dot(const float4& a, const float4& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); #else return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); #endif } comp_device_inline float4 fabs(const float4& a) { return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); } comp_device_inline float len_squared(const float4& a) { return dot(a, a); } comp_device_inline float4 rcp(const float4& a) { #ifdef __KERNEL_SSE__ float4 r(_mm_rcp_ps(a.m128)); return float4(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); #endif } comp_device_inline float4 cross(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); #else return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); #endif } comp_device_inline bool is_zero(const float4& a) { #ifdef __KERNEL_SSE__ return a == make_float4(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); #endif } comp_device_inline float reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h(shuffle<1,0,3,2>(a) + a); /* TODO(sergey): Investigate efficiency. */ return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); #else return ((a.x + a.y) + (a.z + a.w)); #endif } comp_device_inline float average(const float4& a) { return reduce_add(a) * 0.25f; } comp_device_inline float len(const float4& a) { return sqrt(dot(a, a)); } comp_device_inline float4 normalize(const float4& a) { return a/len(a); } comp_device_inline float4 safe_normalize(const float4& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float4 min(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_min_ps(a.m128, b.m128)); #else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline float4 max(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_max_ps(a.m128, b.m128)); #else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } #endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b) { return float4(_mm_castsi128_ps( _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); } template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) { return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); } # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { return float4(_mm_moveldup_ps(b)); } template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) { return float4(_mm_movehdup_ps(b)); } # endif /* __KERNEL_SSE3__ */ #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b))); #else return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline float4 reduce_min(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = min(shuffle<1,0,3,2>(a), a); return min(shuffle<2,3,0,1>(h), h); #else return make_float4(min(min(a.x, a.y), min(a.z, a.w))); #endif } comp_device_inline float4 reduce_max(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = max(shuffle<1,0,3,2>(a), a); return max(shuffle<2,3,0,1>(h), h); #else return make_float4(max(max(a.x, a.y), max(a.z, a.w))); #endif } comp_device_inline float4 mix(const float4& a, const float4& b, float t) { return a + t*(b - a); } #if 0 comp_device_inline float4 reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = shuffle<1,0,3,2>(a) + a; return shuffle<2,3,0,1>(h) + h; #else return make_float4((a.x + a.y) + (a.z + a.w)); #endif } #endif #endif /* !_KERNEL_GPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT4_H_ */ // END #include "util/util_math_float4.h" COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ /* Interpolation */ template A lerp(const A& a, const A& b, const B& t) { return (A)(a * ((B)1 - t) + b * t); } /* Triangle */ comp_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3) { return len(cross(v3 - v2, v1 - v2))*0.5f; } #endif /* _KERNEL_CPU_ */ /* Orthonormal vectors */ comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) { if(N.x != N.y || N.x != N.z) comp_get_ref(a) = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N else comp_get_ref(a) = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N comp_get_ref(a) = normalize(comp_get_ref(a)); comp_get_ref(b) = cross(N, comp_get_ref(a)); } /* Color division */ comp_device_inline float3 safe_invert_color(float3 a) { float x, y, z; x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; /* try to get gray even if b is zero */ if(b.x == 0.0f) { if(b.y == 0.0f) { x = z; y = z; } else if(b.z == 0.0f) { x = y; z = y; } else x = 0.5f*(y + z); } else if(b.y == 0.0f) { if(b.z == 0.0f) { y = x; z = x; } else y = 0.5f*(x + z); } else if(b.z == 0.0f) { z = 0.5f*(x + y); } return make_float3(x, y, z); } /* Rotation of point around axis and angle */ comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) { float costheta = cos(angle); float sintheta = sin(angle); float3 r; r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); return r; } /* NaN-safe math ops */ comp_device_inline float safe_sqrt(float f) { return sqrt(max(f, 0.0f)); } comp_device float safe_asin(float a) { return asin(clamp(a, -1.0f, 1.0f)); } comp_device float safe_acos(float a) { return acos(clamp(a, -1.0f, 1.0f)); } comp_device float compatible_pow(float x, float y) { #ifdef _KERNEL_GPU_ if(y == 0.0f) /* x^0 -> 1, including 0^0 */ return 1.0f; /* GPU pow doesn't accept negative x, do manual checks here */ if(x < 0.0f) { if(mod(-y, 2.0f) == 0.0f) return pow(-x, y); else return -pow(-x, y); } else if(x == 0.0f) return 0.0f; #endif return pow(x, y); } comp_device float safe_pow(float a, float b) { if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_pow(a, b); } comp_device float safe_divide(float a, float b) { return (b != 0.0f)? a/b: 0.0f; } comp_device float safe_log(float a, float b) { if(UNLIKELY(a <= 0.0f || b <= 0.0f)) return 0.0f; return safe_divide(log(a),log(b)); } comp_device float safe_modulo(float a, float b) { return (b != 0.0f)? mod(a, b): 0.0f; } comp_device_inline float xor_signmask(float x, int y) { return int_as_float(float_as_int(x) ^ y); } COMP_NAMESPACE_END #endif /* _UTIL_MATH_H_ */ // END #include "util/util_math.h" // -------------------------------------------------------- // START #include "util/util_math_intersect.h" // -------------------------------------------------------- #ifndef _UTIL_MATH_INTERSECT_H_ #define _UTIL_MATH_INTERSECT_H_ COMP_NAMESPACE_BEGIN /* Ray Plane Intersection */ comp_device_inline bool ray_plane_intersect(comp_device_struct Ray* ray, float3 planePoint, float3 planeNormal, float* length) { // assume ray->D and planeNormal are unit vectors float denom = dot(planeNormal, ray->D); if (denom > 1e-6) { *length = dot(planePoint - ray->P, planeNormal) / denom; return true; } return false; } COMP_NAMESPACE_END #endif // END #include "util/util_math_intersect.h" // -------------------------------------------------------- // START #include "util/util_texture.h" // -------------------------------------------------------- /* * Copyright 2011-2016 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TEXTURE_H_ #define _UTIL_TEXTURE_H_ COMP_NAMESPACE_BEGIN COMP_NAMESPACE_END #endif /* _UTIL_TEXTURE_H_ */ // END #include "util/util_texture.h" // -------------------------------------------------------- // START #include "util/util_color.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_COLOR_H_ #define _UTIL_COLOR_H_ // -------------------------------------------------------- // START #include "util/util_math.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_H_ #define _UTIL_MATH_H_ /* Math * * Basic math functions on scalar and vector types. This header is used by * both the kernel code when compiled as C++, and other C++ non-kernel code. */ #ifdef _KERNEL_CPU_ # include # include # include # include #endif /* _KERNEL_CPU_ */ // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } int& int2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline int2 make_int2(int x, int y) { int2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_IMPL_H_ */ // END #include "util/util_types_int2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_IMPL_H_ #define _UTIL_TYPES_INT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int3::int3() { } __forceinline int3::int3(const __m128i& a) : m128(a) { } __forceinline int3::int3(const int3& a) : m128(a.m128) { } __forceinline int3::operator const __m128i&(void) const { return m128; } __forceinline int3::operator __m128i&(void) { return m128; } __forceinline int3& int3::operator =(const int3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline int& int3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline int3 make_int3(int i) { #ifdef __KERNEL_SSE__ int3 a(_mm_set1_epi32(i)); #else int3 a = {i, i, i, i}; #endif return a; } comp_device_inline int3 make_int3(int x, int y, int z) { #ifdef __KERNEL_SSE__ int3 a(_mm_set_epi32(0, z, y, x)); #else int3 a = {x, y, z, 0}; #endif return a; } comp_device_inline void print_int3(const char *label, const int3& a) { printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_IMPL_H_ */ // END #include "util/util_types_int3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_int4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_IMPL_H_ #define _UTIL_TYPES_INT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline int4::int4() { } __forceinline int4::int4(const int4& a) : m128(a.m128) { } __forceinline int4::int4(const __m128i& a) : m128(a) { } __forceinline int4::operator const __m128i&(void) const { return m128; } __forceinline int4::operator __m128i&(void) { return m128; } __forceinline int4& int4::operator=(const int4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline int& int4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline int4 make_int4(int i) { #ifdef __KERNEL_SSE__ int4 a(_mm_set1_epi32(i)); #else int4 a = {i, i, i, i}; #endif return a; } comp_device_inline int4 make_int4(int x, int y, int z, int w) { #ifdef __KERNEL_SSE__ int4 a(_mm_set_epi32(w, z, y, x)); #else int4 a = {x, y, z, w}; #endif return a; } comp_device_inline int4 make_int4(const float3& f) { #ifdef __KERNEL_SSE__ int4 a(_mm_cvtps_epi32(f.m128)); #else int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; #endif return a; } comp_device_inline void print_int4(const char *label, const int4& a) { printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_IMPL_H_ */ // END #include "util/util_types_int4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_IMPL_H_ #define _UTIL_TYPES_UINT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint2::operator[](uint i) const { util_assert(i < 2); return *(&x + i); } __forceinline uint& uint2::operator[](uint i) { util_assert(i < 2); return *(&x + i); } comp_device_inline uint2 make_uint2(uint x, uint y) { uint2 a = {x, y}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_IMPL_H_ */ // END #include "util/util_types_uint2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_IMPL_H_ #define _UTIL_TYPES_UINT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint3::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint3::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint3 make_uint3(uint x, uint y, uint z) { uint3 a = {x, y, z}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_IMPL_H_ */ // END #include "util/util_types_uint3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_IMPL_H_ #define _UTIL_TYPES_UINT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline uint uint4::operator[](uint i) const { util_assert(i < 3); return *(&x + i); } __forceinline uint& uint4::operator[](uint i) { util_assert(i < 3); return *(&x + i); } comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { uint4 a = {x, y, z, w}; return a; } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_IMPL_H_ */ // END #include "util/util_types_uint4_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_IMPL_H_ #define _UTIL_TYPES_FLOAT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ __forceinline float float2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } __forceinline float& float2::operator[](int i) { util_assert(i >= 0); util_assert(i < 2); return *(&x + i); } comp_device_inline float2 make_float2(float x, float y) { float2 a = {x, y}; return a; } comp_device_inline void print_float2(const char *label, const float2& a) { printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_IMPL_H_ */ // END #include "util/util_types_float2_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float3_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_IMPL_H_ #define _UTIL_TYPES_FLOAT3_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float3::float3() { } __forceinline float3::float3(const float3& a) : m128(a.m128) { } __forceinline float3::float3(const __m128& a) : m128(a) { } __forceinline float3::operator const __m128&(void) const { return m128; } __forceinline float3::operator __m128&(void) { return m128; } __forceinline float3& float3::operator =(const float3& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } __forceinline float& float3::operator[](int i) { util_assert(i >= 0); util_assert(i < 3); return *(&x + i); } comp_device_inline float3 make_float3(float f) { #ifdef __KERNEL_SSE__ float3 a(_mm_set1_ps(f)); #else float3 a = {f, f, f, f}; #endif return a; } comp_device_inline float3 make_float3(float x, float y, float z) { #ifdef __KERNEL_SSE__ float3 a(_mm_set_ps(0.0f, z, y, x)); #else float3 a = {x, y, z, 0.0f}; #endif return a; } comp_device_inline void print_float3(const char *label, const float3& a) { printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_IMPL_H_ */ // END #include "util/util_types_float3_impl.h" // -------------------------------------------------------- // START #include "util/util_types_float4_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_IMPL_H_ #define _UTIL_TYPES_FLOAT4_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif #ifdef _KERNEL_CPU_ # include #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ #ifdef __KERNEL_SSE__ __forceinline float4::float4() { } __forceinline float4::float4(const float4& a) : m128(a.m128) { } __forceinline float4::float4(const __m128& a) : m128(a) { } __forceinline float4::operator const __m128&(void) const { return m128; } __forceinline float4::operator __m128&(void) { return m128; } __forceinline float4& float4::operator =(const float4& a) { m128 = a.m128; return *this; } #endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } __forceinline float& float4::operator[](int i) { util_assert(i >= 0); util_assert(i < 4); return *(&x + i); } comp_device_inline float4 make_float4(float f) { #ifdef __KERNEL_SSE__ float4 a(_mm_set1_ps(f)); #else float4 a = {f, f, f, f}; #endif return a; } comp_device_inline float4 make_float4(float x, float y, float z, float w) { #ifdef __KERNEL_SSE__ float4 a(_mm_set_ps(w, z, y, x)); #else float4 a = {x, y, z, w}; #endif return a; } comp_device_inline float4 make_float4(const int4& i) { #ifdef __KERNEL_SSE__ float4 a(_mm_cvtepi32_ps(i.m128)); #else float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; #endif return a; } comp_device_inline void print_float4(const char *label, const float4& a) { printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_IMPL_H_ */ // END #include "util/util_types_float4_impl.h" #endif /* _UTIL_TYPES_H_ */ // END #include "util/util_types.h" COMP_NAMESPACE_BEGIN /* Float Pi variations */ /* Division */ #ifndef M_PI_F # define M_PI_F (3.1415926535897932f) /* pi */ #endif #ifndef M_PI_2_F # define M_PI_2_F (1.5707963267948966f) /* pi/2 */ #endif #ifndef M_PI_4_F # define M_PI_4_F (0.7853981633974830f) /* pi/4 */ #endif #ifndef M_1_PI_F # define M_1_PI_F (0.3183098861837067f) /* 1/pi */ #endif #ifndef M_2_PI_F # define M_2_PI_F (0.6366197723675813f) /* 2/pi */ #endif /* Multiplication */ #ifndef M_2PI_F # define M_2PI_F (6.2831853071795864f) /* 2*pi */ #endif #ifndef M_4PI_F # define M_4PI_F (12.566370614359172f) /* 4*pi */ #endif /* Float sqrt variations */ #ifndef M_SQRT2_F # define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif #ifndef M_LN2_F # define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif #ifndef M_LN10_F # define M_LN10_F (2.3025850929940457f) /* ln(10) */ #endif /* Scalar */ #ifdef _WIN32 # ifndef __KERNEL_OPENCL__ comp_device_inline float fmaxf(float a, float b) { return (a > b)? a: b; } comp_device_inline float fminf(float a, float b) { return (a < b)? a: b; } # endif /* !__KERNEL_OPENCL__ */ #endif /* _WIN32 */ #ifdef _KERNEL_CPU_ using std::isfinite; using std::isnan; using std::fabs; #define floor(var) floorf(var) #define ceil(var) ceilf(var) #define cos(var) cosf(var) #define sin(var) sinf(var) #define sqrt(var) sqrtf(var) #define asin(var) asinf(var) #define acos(var) acos(var) #define mod(var1, var2) fmodf(var1, var2) #define pow(var1, var2) powf(var1, var2) #define log(var) logf(var) comp_device_inline int max(int a, int b) { return (a > b)? a: b; } comp_device_inline int min(int a, int b) { return (a < b)? a: b; } comp_device_inline float max(float a, float b) { return (a > b)? a: b; } comp_device_inline float min(float a, float b) { return (a < b)? a: b; } comp_device_inline double max(double a, double b) { return (a > b)? a: b; } comp_device_inline double min(double a, double b) { return (a < b)? a: b; } /* These 2 guys are templated for usage with registers data. * * NOTE: Since this is CPU-only functions it is ok to use references here. * But for other devices we'll need to be careful about this. */ template comp_device_inline T min4(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } template comp_device_inline T max4(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define abs(x) fabs(x) #define mod(x, y) fmod(x, y) #endif /* _KERNEL_OPENCL_ */ comp_device_inline float min4(float a, float b, float c, float d) { return min(min(a, b), min(c, d)); } comp_device_inline float max4(float a, float b, float c, float d) { return max(max(a, b), max(c, d)); } #ifdef _KERNEL_CPU_ /* Int/Float conversion */ comp_device_inline int uint_as_int(uint i) { union { uint ui; int i; } u; u.ui = i; return u.i; } comp_device_inline uint int_as_uint(int i) { union { uint ui; int i; } u; u.i = i; return u.ui; } comp_device_inline int float_as_int(float f) { union { int i; float f; } u; u.f = f; return u.i; } comp_device_inline float int_as_float(int i) { union { int i; float f; } u; u.i = i; return u.f; } comp_device_inline uint float_as_uint(float f) { union { uint i; float f; } u; u.f = f; return u.i; } comp_device_inline float uint_as_float(uint i) { union { uint i; float f; } u; u.i = i; return u.f; } #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define uint_as_int(i) as_int(i) #define int_as_uint(i) as_uint(i) #define float_as_int(f) as_int(f) #define int_as_float(i) as_float(i) #define float_as_uint(f) as_uint(f) #define uint_as_float(i) as_float(i) #endif /* Versions of functions which are safe for fast math. */ comp_device_inline bool isnan_safe(float f) { uint x = float_as_uint(f); return (x << 1) > 0xff000000u; } // comp_device_inline bool isfinite_safe(float f) // { // /* By IEEE 754 rule, 2*Inf equals Inf */ // uint x = float_as_uint(f); // return (f == f) && (x == 0 || (f != 2.0f*f)) && !((x << 1) > 0xff000000u); // } // comp_device_inline float ensure_finite(float v) // { // return isfinite_safe(v)? v : 0.0f; // } #ifdef _KERNEL_CPU_ comp_device_inline int clamp(int a, int mn, int mx) { return min(max(a, mn), mx); } comp_device_inline float clamp(float a, float mn, float mx) { return min(max(a, mn), mx); } comp_device_inline float mix(float a, float b, float t) { return a + t*(b - a); } #endif /* __KERNEL_OPENCL__ */ comp_device_inline float saturate(float a) { return clamp(a, 0.0f, 1.0f); } #ifdef _KERNEL_CPU_ comp_device_inline int float_to_int(float f) { return (int)f; } #endif #ifdef _KERNEL_OPENCL_ comp_device_inline int float_to_int(float f) { return convert_int(f); } #endif /* _KERNEL_OPENCL_ */ comp_device_inline int floor_to_int(float f) { return float_to_int(floor(f)); } comp_device_inline int ceil_to_int(float f) { return float_to_int(ceil(f)); } comp_device_inline float signf(float f) { return (f < 0.0f)? -1.0f: 1.0f; } comp_device_inline float nonzerof(float f, float eps) { if(abs(f) < eps) return signf(f)*eps; else return f; } comp_device_inline float smoothstepf(float f) { float ff = f*f; return (3.0f*ff - 2.0f*ff*f); } comp_device_inline int modulo(int x, int m) { return (x % m + m) % m; } comp_device_inline float3 float2_to_float3(const float2 a) { return make_float3(a.x, a.y, 0.0f); } comp_device_inline float3 float4_to_float3(const float4 a) { return make_float3(a.x, a.y, a.z); } comp_device_inline float4 float3_to_float4(const float3 a) { return make_float4(a.x, a.y, a.z, 1.0f); } COMP_NAMESPACE_END // -------------------------------------------------------- // START #include "util/util_math_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT2_H_ #define _UTIL_MATH_INT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b); comp_device_inline int2 operator+(const int2 &a, const int2 &b); comp_device_inline int2 operator+=(int2 &a, const int2 &b); comp_device_inline int2 operator-(const int2 &a, const int2 &b); comp_device_inline int2 operator*(const int2 &a, const int2 &b); comp_device_inline int2 operator/(const int2 &a, const int2 &b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline bool operator==(const int2 a, const int2 b) { return (a.x == b.x && a.y == b.y); } comp_device_inline int2 operator+(const int2 &a, const int2 &b) { return make_int2(a.x + b.x, a.y + b.y); } comp_device_inline int2 operator+=(int2 &a, const int2 &b) { return a = a + b; } comp_device_inline int2 operator-(const int2 &a, const int2 &b) { return make_int2(a.x - b.x, a.y - b.y); } comp_device_inline int2 operator*(const int2 &a, const int2 &b) { return make_int2(a.x * b.x, a.y * b.y); } comp_device_inline int2 operator/(const int2 &a, const int2 &b) { return make_int2(a.x / b.x, a.y / b.y); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT2_H_ */ // END #include "util/util_math_int2.h" // -------------------------------------------------------- // START #include "util/util_math_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT3_H_ #define _UTIL_MATH_INT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b); comp_device_inline int3 max(int3 a, int3 b); comp_device_inline int3 clamp(const int3& a, int mn, int mx); comp_device_inline int3 clamp(const int3& a, int3& mn, int mx); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int3 min(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_min_epi32(a.m128, b.m128)); #else return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline int3 max(int3 a, int3 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int3(_mm_max_epi32(a.m128, b.m128)); #else return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline int3 clamp(const int3& a, int mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, make_int3(mn)), make_int3(mx)); #else return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); #endif } comp_device_inline int3 clamp(const int3& a, int3& mn, int mx) { #ifdef __KERNEL_SSE__ return min(max(a, mn), make_int3(mx)); #else return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT3_H_ */ // END #include "util/util_math_int3.h" // -------------------------------------------------------- // START #include "util/util_math_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_INT4_H_ #define _UTIL_MATH_INT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b); comp_device_inline int4 operator+=(int4& a, const int4& b); comp_device_inline int4 operator>>(const int4& a, int i); comp_device_inline int4 min(int4 a, int4 b); comp_device_inline int4 max(int4 a, int4 b); comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx); comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline int4 operator+(const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ return int4(_mm_add_epi32(a.m128, b.m128)); #else return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline int4 operator+=(int4& a, const int4& b) { return a = a + b; } comp_device_inline int4 operator>>(const int4& a, int i) { #ifdef __KERNEL_SSE__ return int4(_mm_srai_epi32(a.m128, i)); #else return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); #endif } comp_device_inline int4 min(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_min_epi32(a.m128, b.m128)); #else return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline int4 max(int4 a, int4 b) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) return int4(_mm_max_epi32(a.m128, b.m128)); #else return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } comp_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) { return min(max(a, mn), mx); } comp_device_inline int4 select(const int4& mask, const int4& a, const int4& b) { #ifdef __KERNEL_SSE__ const __m128 m = _mm_cvtepi32_ps(mask); /* TODO(sergey): avoid cvt. */ return int4(_mm_castps_si128( _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); #else return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline int4 load_int4(const int *v) { #ifdef __KERNEL_SSE__ return int4(_mm_loadu_si128((__m128i*)v)); #else return make_int4(v[0], v[1], v[2], v[3]); #endif } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_INT4_H_ */ // END #include "util/util_math_int4.h" // -------------------------------------------------------- // START #include "util/util_math_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT2_H_ #define _UTIL_MATH_FLOAT2_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a); comp_device_inline float2 operator*(const float2& a, const float2& b); comp_device_inline float2 operator*(const float2& a, float f); comp_device_inline float2 operator*(float f, const float2& a); comp_device_inline float2 operator/(float f, const float2& a); comp_device_inline float2 operator/(const float2& a, float f); comp_device_inline float2 operator/(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float2& b); comp_device_inline float2 operator+(const float2& a, const float b); comp_device_inline float2 operator-(const float2& a, const float2& b); comp_device_inline float2 operator+=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, const float2& b); comp_device_inline float2 operator*=(float2& a, float f); comp_device_inline float2 operator/=(float2& a, const float2& b); comp_device_inline float2 operator/=(float2& a, float f); comp_device_inline bool operator==(const float2& a, const float2& b); comp_device_inline bool operator!=(const float2& a, const float2& b); comp_device_inline bool is_zero(const float2& a); comp_device_inline float average(const float2& a); comp_device_inline float dot(const float2& a, const float2& b); comp_device_inline float cross(const float2& a, const float2& b); comp_device_inline float len(const float2& a); comp_device_inline float2 normalize(const float2& a); comp_device_inline float2 normalize_len(const float2& a, float *t); comp_device_inline float2 safe_normalize(const float2& a); comp_device_inline float2 min(const float2& a, const float2& b); comp_device_inline float2 max(const float2& a, const float2& b); comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx); comp_device_inline float2 fabs(const float2& a); comp_device_inline float2 as_float2(const float4& a); comp_device_inline float2 mix(const float2& a, const float2& b, float t); #endif /* _KERNEL_CPU_ */ /******************************************************************************* * Definition. */ #define length_v2(a) sqrt(dot(a, a)) #ifdef _KERNEL_CPU_ comp_device_inline float2 operator-(const float2& a) { return make_float2(-a.x, -a.y); } comp_device_inline float2 operator*(const float2& a, const float2& b) { return make_float2(a.x*b.x, a.y*b.y); } comp_device_inline float2 operator*(const float2& a, float f) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator*(float f, const float2& a) { return make_float2(a.x*f, a.y*f); } comp_device_inline float2 operator/(float f, const float2& a) { return make_float2(f/a.x, f/a.y); } comp_device_inline float2 operator/(const float2& a, float f) { float invf = 1.0f/f; return make_float2(a.x*invf, a.y*invf); } comp_device_inline float2 operator/(const float2& a, const float2& b) { return make_float2(a.x/b.x, a.y/b.y); } comp_device_inline float2 operator+(const float2& a, const float2& b) { return make_float2(a.x+b.x, a.y+b.y); } comp_device_inline float2 operator+(const float2& a, float b) { return make_float2(a.x+b, a.y+b); } comp_device_inline float2 operator-(const float2& a, const float2& b) { return make_float2(a.x-b.x, a.y-b.y); } comp_device_inline float2 operator+=(float2& a, const float2& b) { return a = a + b; } comp_device_inline float2 operator*=(float2& a, const float2& b) { return a = a * b; } comp_device_inline float2 operator*=(float2& a, float f) { return a = a * f; } comp_device_inline float2 operator/=(float2& a, const float2& b) { return a = a / b; } comp_device_inline float2 operator/=(float2& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float2& a, const float2& b) { return (a.x == b.x && a.y == b.y); } comp_device_inline bool operator!=(const float2& a, const float2& b) { return !(a == b); } comp_device_inline bool is_zero(const float2& a) { return (a.x == 0.0f && a.y == 0.0f); } comp_device_inline float average(const float2& a) { return (a.x + a.y)*(1.0f/2.0f); } comp_device_inline float dot(const float2& a, const float2& b) { return a.x*b.x + a.y*b.y; } comp_device_inline float cross(const float2& a, const float2& b) { return (a.x*b.y - a.y*b.x); } comp_device_inline float len(const float2& a) { return sqrt(dot(a, a)); } comp_device_inline float2 normalize(const float2& a) { return a/len(a); } comp_device_inline float2 normalize_len(const float2& a, float *t) { *t = len(a); return a/(*t); } comp_device_inline float2 safe_normalize(const float2& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float2 min(const float2& a, const float2& b) { return make_float2(min(a.x, b.x), min(a.y, b.y)); } comp_device_inline float2 max(const float2& a, const float2& b) { return make_float2(max(a.x, b.x), max(a.y, b.y)); } comp_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx) { return min(max(a, mn), mx); } comp_device_inline float2 fabs(const float2& a) { return make_float2(abs(a.x), abs(a.y)); } comp_device_inline float2 as_float2(const float4& a) { return make_float2(a.x, a.y); } comp_device_inline float2 mix(const float2& a, const float2& b, float t) { return a + t*(b - a); } #endif /* _KERNEL_CPU_ */ COMP_NAMESPACE_END #endif /* __UTIL_MATH_FLOAT2_H__ */ // END #include "util/util_math_float2.h" // -------------------------------------------------------- // START #include "util/util_math_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT3_H_ #define _UTIL_MATH_FLOAT3_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a); comp_device_inline float3 operator*(const float3& a, const float3& b); comp_device_inline float3 operator*(const float3& a, const float f); comp_device_inline float3 operator*(const float f, const float3& a); comp_device_inline float3 operator/(const float f, const float3& a); comp_device_inline float3 operator/(const float3& a, const float f); comp_device_inline float3 operator/(const float3& a, const float3& b); comp_device_inline float3 operator+(const float3& a, const float3& b); comp_device_inline float3 operator-(const float3& a, const float3& b); comp_device_inline float3 operator+=(float3& a, const float3& b); comp_device_inline float3 operator-=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, const float3& b); comp_device_inline float3 operator*=(float3& a, float f); comp_device_inline float3 operator/=(float3& a, const float3& b); comp_device_inline float3 operator/=(float3& a, float f); comp_device_inline bool operator==(const float3& a, const float3& b); comp_device_inline bool operator!=(const float3& a, const float3& b); comp_device_inline float dot(const float3& a, const float3& b); comp_device_inline float dot_xy(const float3& a, const float3& b); comp_device_inline float3 cross(const float3& a, const float3& b); comp_device_inline float3 normalize(const float3& a); comp_device_inline float3 min(const float3& a, const float3& b); comp_device_inline float3 max(const float3& a, const float3& b); comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx); comp_device_inline float3 clamp(const float3& a, float mn, float mx); comp_device_inline float3 fabs(const float3& a); comp_device_inline float3 mix(const float3& a, const float3& b, float t); comp_device_inline float3 rcp(const float3& a); #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a); comp_device_inline float len(const float3 a); comp_device_inline float len_squared(const float3 a); #define length_v3(a) sqrt(dot(a, a)) comp_device_inline float3 saturate3(float3 a); comp_device_inline float3 safe_normalize(const float3 a); comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)); comp_device_inline bool is_zero(const float3 a); comp_device_inline float reduce_add(const float3 a); comp_device_inline float average(const float3 a); comp_device_inline bool isequal_float3(const float3 a, const float3 b); /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float3 operator-(const float3& a) { #ifdef __KERNEL_SSE__ return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); #else return make_float3(-a.x, -a.y, -a.z); #endif } comp_device_inline float3 operator*(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,b.m128)); #else return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); #endif } comp_device_inline float3 operator*(const float3& a, const float f) { #ifdef __KERNEL_SSE__ return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f))); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator*(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); #else return make_float3(a.x*f, a.y*f, a.z*f); #endif } comp_device_inline float3 operator/(const float f, const float3& a) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(a.m128); return float3(_mm_mul_ps(_mm_set1_ps(f),rc)); #else return make_float3(f / a.x, f / a.y, f / a.z); #endif } comp_device_inline float3 operator/(const float3& a, const float f) { float invf = 1.0f/f; return a * invf; } comp_device_inline float3 operator/(const float3& a, const float3& b) { /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */ #if defined(__KERNEL_SSE__) && 0 __m128 rc = _mm_rcp_ps(b.m128); return float3(_mm_mul_ps(a, rc)); #else return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); #endif } comp_device_inline float3 operator+(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_add_ps(a.m128, b.m128)); #else return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); #endif } comp_device_inline float3 operator-(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_sub_ps(a.m128, b.m128)); #else return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); #endif } comp_device_inline float3 operator+=(float3& a, const float3& b) { return a = a + b; } comp_device_inline float3 operator-=(float3& a, const float3& b) { return a = a - b; } comp_device_inline float3 operator*=(float3& a, const float3& b) { return a = a * b; } comp_device_inline float3 operator*=(float3& a, float f) { return a = a * f; } comp_device_inline float3 operator/=(float3& a, const float3& b) { return a = a / b; } comp_device_inline float3 operator/=(float3& a, float f) { float invf = 1.0f/f; return a = a * invf; } comp_device_inline bool operator==(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; #else return (a.x == b.x && a.y == b.y && a.z == b.z); #endif } comp_device_inline bool operator!=(const float3& a, const float3& b) { return !(a == b); } comp_device_inline float dot(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); #else return a.x*b.x + a.y*b.y + a.z*b.z; #endif } comp_device_inline float dot_xy(const float3& a, const float3& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b)); #else return a.x*b.x + a.y*b.y; #endif } comp_device_inline float3 cross(const float3& a, const float3& b) { float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); return r; } comp_device_inline float3 normalize(const float3& a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return float3(_mm_div_ps(a.m128, norm)); #else return a/len(a); #endif } comp_device_inline float3 min(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_min_ps(a.m128, b.m128)); #else return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); #endif } comp_device_inline float3 max(const float3& a, const float3& b) { #ifdef __KERNEL_SSE__ return float3(_mm_max_ps(a.m128, b.m128)); #else return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); #endif } comp_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx) { return min(max(a, mn), mx); } comp_device_inline float3 clamp(const float3& a, float mn, float mx) { return clamp(a, make_float3(mn,mn,mn), make_float3(mx,mx,mx)); } comp_device_inline float3 fabs(const float3& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return float3(_mm_and_ps(a.m128, mask)); #else return make_float3(fabs(a.x), fabs(a.y), fabs(a.z)); #endif } comp_device_inline float3 mix(const float3& a, const float3& b, float t) { return a + t*(b - a); } comp_device_inline float3 rcp(const float3& a) { #ifdef __KERNEL_SSE__ const float4 r(_mm_rcp_ps(a.m128)); return float3(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); #endif } #endif /* _KERNEL_CPU_ */ comp_device_inline float max3(float3 a) { return max(max(a.x, a.y), a.z); } comp_device_inline float len(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else return sqrt(dot(a, a)); #endif } comp_device_inline float len_squared(const float3 a) { return dot(a, a); } comp_device_inline float3 saturate3(float3 a) { return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); } comp_device_inline float3 normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); float x = 1.0f / comp_get_ref(t); return a*x; } comp_device_inline float3 safe_normalize(const float3 a) { float t = len(a); return (t != 0.0f)? a * (1.0f/t) : a; } comp_device_inline float3 safe_normalize_len(const float3 a, comp_inout(float, t)) { comp_get_ref(t) = len(a); return (comp_get_ref(t) != 0.0f)? a/(comp_get_ref(t)): a; } comp_device_inline bool is_zero(const float3 a) { #ifdef __KERNEL_SSE__ return a == make_float3(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); #endif } comp_device_inline float reduce_add(const float3 a) { return (a.x + a.y + a.z); } comp_device_inline float average(const float3 a) { return reduce_add(a)*(1.0f/3.0f); } comp_device_inline bool isequal_float3(const float3 a, const float3 b) { #ifdef _KERNEL_OPENCL_ return all(a == b); #else return a == b; #endif } COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT3_H_ */ // END #include "util/util_math_float3.h" // -------------------------------------------------------- // START #include "util/util_math_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_MATH_FLOAT4_H_ #define _UTIL_MATH_FLOAT4_H_ #ifndef _UTIL_MATH_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN /******************************************************************************* * Declaration. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a); comp_device_inline float4 operator*(const float4& a, const float4& b); comp_device_inline float4 operator*(const float4& a, float f); comp_device_inline float4 operator*(float f, const float4& a); comp_device_inline float4 operator/(const float4& a, float f); comp_device_inline float4 operator/(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, const float4& b); comp_device_inline float4 operator+(const float4& a, float f); comp_device_inline float4 operator-(const float4& a, const float4& b); comp_device_inline float4 operator-(const float4& a, const float b); comp_device_inline float4 operator+=(float4& a, const float4& b); comp_device_inline float4 operator*=(float4& a, const float4& b); comp_device_inline float4 operator/=(float4& a, float f); comp_device_inline int4 operator<(const float4& a, const float4& b); comp_device_inline int4 operator>=(const float4& a, const float4& b); comp_device_inline int4 operator<=(const float4& a, const float4& b); comp_device_inline bool operator==(const float4& a, const float4& b); comp_device_inline float dot(const float4& a, const float4& b); comp_device_inline float4 fabs(const float4& a); comp_device_inline float len_squared(const float4& a); comp_device_inline float4 rcp(const float4& a); comp_device_inline float4 cross(const float4& a, const float4& b); comp_device_inline bool is_zero(const float4& a); comp_device_inline float reduce_add(const float4& a); comp_device_inline float average(const float4& a); comp_device_inline float len(const float4& a); comp_device_inline float4 normalize(const float4& a); comp_device_inline float4 safe_normalize(const float4& a); comp_device_inline float4 min(const float4& a, const float4& b); comp_device_inline float4 max(const float4& a, const float4& b); comp_device_inline float4 mix(const float4& a, const float4& b, float t); #endif /* _KERNEL_CPU_ */ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b); template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b); # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b); template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b); # endif #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b); comp_device_inline float4 reduce_min(const float4& a); comp_device_inline float4 reduce_max(const float4& a); # if 0 comp_device_inline float4 reduce_add(const float4& a); # endif #endif /* !_KERNEL_GPU_ */ #define length_v4(a) sqrt(dot(a, a)) /******************************************************************************* * Definition. */ #ifdef _KERNEL_CPU_ comp_device_inline float4 operator-(const float4& a) { #ifdef __KERNEL_SSE__ __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); return float4(_mm_xor_ps(a.m128, mask)); #else return make_float4(-a.x, -a.y, -a.z, -a.w); #endif } comp_device_inline float4 operator*(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_mul_ps(a.m128, b.m128)); #else return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); #endif } comp_device_inline float4 operator*(const float4& a, float f) { #if defined(__KERNEL_SSE__) return a * make_float4(f); #else return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); #endif } comp_device_inline float4 operator*(float f, const float4& a) { return a * f; } comp_device_inline float4 operator/(const float4& a, float f) { return a * (1.0f/f); } comp_device_inline float4 operator/(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return a * rcp(b); #else return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_add_ps(a.m128, b.m128)); #else return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); #endif } comp_device_inline float4 operator+(const float4& a, const float b) { return make_float4(a.x+b, a.y+b, a.z+b, a.w+b); } comp_device_inline float4 operator-(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_sub_ps(a.m128, b.m128)); #else return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); #endif } comp_device_inline float4 operator-(const float4& a, const float b) { return make_float4(a.x-b, a.y-b, a.z-b, a.w-b); } comp_device_inline float4 operator+=(float4& a, const float4& b) { return a = a + b; } comp_device_inline float4 operator*=(float4& a, const float4& b) { return a = a * b; } comp_device_inline float4 operator/=(float4& a, float f) { return a = a / f; } comp_device_inline int4 operator<(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128))); #else return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); #endif } comp_device_inline int4 operator>=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128))); #else return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); #endif } comp_device_inline int4 operator<=(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128))); #else return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); #endif } comp_device_inline bool operator==(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; #else return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); #endif } comp_device_inline float dot(const float4& a, const float4& b) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); #else return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); #endif } comp_device_inline float4 fabs(const float4& a) { return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); } comp_device_inline float len_squared(const float4& a) { return dot(a, a); } comp_device_inline float4 rcp(const float4& a) { #ifdef __KERNEL_SSE__ float4 r(_mm_rcp_ps(a.m128)); return float4(_mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a))); #else return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); #endif } comp_device_inline float4 cross(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); #else return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f); #endif } comp_device_inline bool is_zero(const float4& a) { #ifdef __KERNEL_SSE__ return a == make_float4(0.0f); #else return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); #endif } comp_device_inline float reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h(shuffle<1,0,3,2>(a) + a); /* TODO(sergey): Investigate efficiency. */ return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); #else return ((a.x + a.y) + (a.z + a.w)); #endif } comp_device_inline float average(const float4& a) { return reduce_add(a) * 0.25f; } comp_device_inline float len(const float4& a) { return sqrt(dot(a, a)); } comp_device_inline float4 normalize(const float4& a) { return a/len(a); } comp_device_inline float4 safe_normalize(const float4& a) { float t = len(a); return (t != 0.0f)? a/t: a; } comp_device_inline float4 min(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_min_ps(a.m128, b.m128)); #else return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); #endif } comp_device_inline float4 max(const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ return float4(_mm_max_ps(a.m128, b.m128)); #else return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); #endif } #endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template __forceinline const float4 shuffle(const float4& b) { return float4(_mm_castsi128_ps( _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); } template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) { return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); } # ifdef __KERNEL_SSE3__ template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) { return float4(_mm_moveldup_ps(b)); } template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) { return float4(_mm_movehdup_ps(b)); } # endif /* __KERNEL_SSE3__ */ #endif /* __KERNEL_SSE__ */ #ifdef _KERNEL_CPU_ comp_device_inline float4 select(const int4& mask, const float4& a, const float4& b) { #ifdef __KERNEL_SSE__ /* TODO(sergey): avoid cvt. */ return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a), _mm_andnot_ps(_mm_cvtepi32_ps(mask), b))); #else return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w); #endif } comp_device_inline float4 reduce_min(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = min(shuffle<1,0,3,2>(a), a); return min(shuffle<2,3,0,1>(h), h); #else return make_float4(min(min(a.x, a.y), min(a.z, a.w))); #endif } comp_device_inline float4 reduce_max(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = max(shuffle<1,0,3,2>(a), a); return max(shuffle<2,3,0,1>(h), h); #else return make_float4(max(max(a.x, a.y), max(a.z, a.w))); #endif } comp_device_inline float4 mix(const float4& a, const float4& b, float t) { return a + t*(b - a); } #if 0 comp_device_inline float4 reduce_add(const float4& a) { #ifdef __KERNEL_SSE__ float4 h = shuffle<1,0,3,2>(a) + a; return shuffle<2,3,0,1>(h) + h; #else return make_float4((a.x + a.y) + (a.z + a.w)); #endif } #endif #endif /* !_KERNEL_GPU_ */ COMP_NAMESPACE_END #endif /* _UTIL_MATH_FLOAT4_H_ */ // END #include "util/util_math_float4.h" COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ /* Interpolation */ template A lerp(const A& a, const A& b, const B& t) { return (A)(a * ((B)1 - t) + b * t); } /* Triangle */ comp_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3) { return len(cross(v3 - v2, v1 - v2))*0.5f; } #endif /* _KERNEL_CPU_ */ /* Orthonormal vectors */ comp_device_inline void make_orthonormals(const float3 N, comp_inout(float3, a), comp_inout(float3, b)) { if(N.x != N.y || N.x != N.z) comp_get_ref(a) = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N else comp_get_ref(a) = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N comp_get_ref(a) = normalize(comp_get_ref(a)); comp_get_ref(b) = cross(N, comp_get_ref(a)); } /* Color division */ comp_device_inline float3 safe_invert_color(float3 a) { float x, y, z; x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; return make_float3(x, y, z); } comp_device_inline float3 safe_divide_even_color(float3 a, float3 b) { float x, y, z; x = (b.x != 0.0f)? a.x/b.x: 0.0f; y = (b.y != 0.0f)? a.y/b.y: 0.0f; z = (b.z != 0.0f)? a.z/b.z: 0.0f; /* try to get gray even if b is zero */ if(b.x == 0.0f) { if(b.y == 0.0f) { x = z; y = z; } else if(b.z == 0.0f) { x = y; z = y; } else x = 0.5f*(y + z); } else if(b.y == 0.0f) { if(b.z == 0.0f) { y = x; z = x; } else y = 0.5f*(x + z); } else if(b.z == 0.0f) { z = 0.5f*(x + y); } return make_float3(x, y, z); } /* Rotation of point around axis and angle */ comp_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) { float costheta = cos(angle); float sintheta = sin(angle); float3 r; r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); return r; } /* NaN-safe math ops */ comp_device_inline float safe_sqrt(float f) { return sqrt(max(f, 0.0f)); } comp_device float safe_asin(float a) { return asin(clamp(a, -1.0f, 1.0f)); } comp_device float safe_acos(float a) { return acos(clamp(a, -1.0f, 1.0f)); } comp_device float compatible_pow(float x, float y) { #ifdef _KERNEL_GPU_ if(y == 0.0f) /* x^0 -> 1, including 0^0 */ return 1.0f; /* GPU pow doesn't accept negative x, do manual checks here */ if(x < 0.0f) { if(mod(-y, 2.0f) == 0.0f) return pow(-x, y); else return -pow(-x, y); } else if(x == 0.0f) return 0.0f; #endif return pow(x, y); } comp_device float safe_pow(float a, float b) { if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_pow(a, b); } comp_device float safe_divide(float a, float b) { return (b != 0.0f)? a/b: 0.0f; } comp_device float safe_log(float a, float b) { if(UNLIKELY(a <= 0.0f || b <= 0.0f)) return 0.0f; return safe_divide(log(a),log(b)); } comp_device float safe_modulo(float a, float b) { return (b != 0.0f)? mod(a, b): 0.0f; } comp_device_inline float xor_signmask(float x, int y) { return int_as_float(float_as_int(x) ^ y); } COMP_NAMESPACE_END #endif /* _UTIL_MATH_H_ */ // END #include "util/util_math.h" // -------------------------------------------------------- // START #include "util/util_types.h" // -------------------------------------------------------- /* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_H_ #define _UTIL_TYPES_H_ #ifdef _KERNEL_CPU_ #include #endif /* Bitness */ #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) # define __KERNEL_64_BIT__ #endif /* Qualifiers for kernel code shared by CPU and GPU */ #ifdef _KERNEL_CPU_ # define comp_device_inline static inline # define comp_device_noinline static # define comp_global # define comp_constant # define comp_local # define comp_local_param # define comp_private # define comp_restrict __restrict # define __KERNEL_WITH_SSE_ALIGN__ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define comp_device_inline static __forceinline # define comp_device_forceinline static __forceinline # define comp_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define comp_try_align(...) __declspec(align(__VA_ARGS__)) # else /* __KERNEL_64_BIT__ */ # undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ # define comp_try_align(...) # endif /* __KERNEL_64_BIT__ */ # define comp_may_alias # define comp_always_inline __forceinline # define comp_never_inline __declspec(noinline) # define comp_maybe_unused # else /* _WIN32 && !FREE_WINDOWS */ //# define comp_device_inline static inline __attribute__((always_inline)) # define comp_device_forceinline static inline __attribute__((always_inline)) # define comp_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) # endif # define comp_try_align(...) __attribute__((aligned(__VA_ARGS__))) # define comp_may_alias __attribute__((__may_alias__)) # define comp_always_inline __attribute__((always_inline)) # define comp_never_inline __attribute__((noinline)) # define comp_maybe_unused __attribute__((used)) # endif /* _WIN32 && !FREE_WINDOWS */ #endif /* _KERNEL_GPU_ */ /* Standard Integer Types */ #ifdef _KERNEL_CPU_ /* int8_t, uint16_t, and friends */ # ifndef _WIN32 # include # endif /* SIMD Types */ // # include "util/util_optimization.h" #endif /* _KERNEL_GPU_ */ COMP_NAMESPACE_BEGIN /* Types * * Define simpler unsigned type names, and integer with defined number of bits. * Also vector types, named to be compatible with OpenCL builtin types, while * working for CUDA and C++ too. */ /* Shorter Unsigned Names */ #ifdef _KERNEL_CPU_ typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #endif // #ifndef __KERNEL_OPENCL__ // typedef unsigned char uchar; // typedef unsigned int uint; // typedef unsigned short ushort; // #endif /* Fixed Bits Types */ // #ifdef __KERNEL_OPENCL__ // typedef ulong uint64_t; // #endif #ifdef _KERNEL_CPU_ # ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef long long int64_t; typedef unsigned long long uint64_t; # ifdef __KERNEL_64_BIT__ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif # endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; #endif /* _KERNEL_GPU_ */ // comp_device_inline size_t align_up(size_t offset, size_t alignment) // { // return (offset + alignment - 1) & ~(alignment - 1); // } // // comp_device_inline size_t divide_up(size_t x, size_t y) // { // return (x + y - 1) / y; // } // // comp_device_inline size_t round_up(size_t x, size_t multiple) // { // return ((x + multiple - 1) / multiple) * multiple; // } // // comp_device_inline size_t round_down(size_t x, size_t multiple) // { // return (x / multiple) * multiple; // } /* Interpolation types for textures * cuda also use texture space to store other objects */ #if defined(_KERNEL_CPU_) || defined(_KERNEL_OPENCL_) enum InterpolationType { INTERPOLATION_LINEAR = 0, INTERPOLATION_CLOSEST = 1, INTERPOLATION_CUBIC = 2, INTERPOLATION_SMART = 3 }; enum ExtensionType { /* Cause the image to repeat horizontally and vertically. */ EXTENSION_REPEAT = 0, /* Extend by repeating edge pixels of the image. */ EXTENSION_EXTEND = 1, /* Clip to image size and set exterior pixels as transparent. */ EXTENSION_CLIP = 2, }; #endif /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ #if defined(__GNUC__) && defined(_KERNEL_CPU_) # define LIKELY(x) __builtin_expect(!!(x), 1) # define UNLIKELY(x) __builtin_expect(!!(x), 0) #else # define LIKELY(x) (x) # define UNLIKELY(x) (x) #endif // #if defined(__GNUC__) || defined(__clang__) && defined(_KERNEL_CPU_) // /* Some magic to be sure we don't have reference in the type. */ // template static inline T decltype_helper(T x) { return x; } // # define TYPEOF(x) decltype(decltype_helper(x)) // #endif COMP_NAMESPACE_END #ifdef _KERNEL_CPU_ # include # define util_assert(statement) assert(statement) #else # define util_assert(statement) #endif /* Vectorized types declaration. */ // -------------------------------------------------------- // START #include "util/util_types_int2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_H_ #define _UTIL_TYPES_INT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int2 { int x, y; __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int2 make_int2(int x, int y); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int2(x, y) ((ivec2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT2_H_ */ // END #include "util/util_types_int2.h" // -------------------------------------------------------- // START #include "util/util_types_int3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT3_H_ #define _UTIL_TYPES_INT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) int3 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int3(); __forceinline int3(const int3& a); __forceinline explicit int3(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int3& operator =(const int3& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int3 make_int3(int i); comp_device_inline int3 make_int3(int x, int y, int z); comp_device_inline void print_int3(const char *label, const int3& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int3(x, y, z) ((int3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT3_H_ */ // END #include "util/util_types_int3.h" // -------------------------------------------------------- // START #include "util/util_types_int4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT4_H_ #define _UTIL_TYPES_INT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float3; struct comp_try_align(16) int4 { #ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; }; __forceinline int4(); __forceinline int4(const int4& a); __forceinline explicit int4(const __m128i& a); __forceinline operator const __m128i&(void) const; __forceinline operator __m128i&(void); __forceinline int4& operator=(const int4& a); #else /* __KERNEL_SSE__ */ int x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline int operator[](int i) const; __forceinline int& operator[](int i); }; comp_device_inline int4 make_int4(int i); comp_device_inline int4 make_int4(int x, int y, int z, int w); comp_device_inline int4 make_int4(const float3& f); comp_device_inline void print_int4(const char *label, const int4& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_int4(x, y, z, w) ((int4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_INT4_H_ */ // END #include "util/util_types_int4.h" // -------------------------------------------------------- // START #include "util/util_types_uint2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT2_H_ #define _UTIL_TYPES_UINT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint2 { uint x, y; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint2 make_uint2(uint x, uint y); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint2(x, y) ((uint2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT2_H_ */ // END #include "util/util_types_uint2.h" // -------------------------------------------------------- // START #include "util/util_types_uint3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT3_H_ #define _UTIL_TYPES_UINT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint3 { uint x, y, z; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint3 make_uint3(uint x, uint y, uint z); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint3(x, y, z) ((uint3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT3_H_ */ // END #include "util/util_types_uint3.h" // -------------------------------------------------------- // START #include "util/util_types_uint4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_UINT4_H_ #define _UTIL_TYPES_UINT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct uint4 { uint x, y, z, w; __forceinline uint operator[](uint i) const; __forceinline uint& operator[](uint i); }; comp_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_uint4(x, y, z, w) ((uint4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_UINT4_H_ */ // END #include "util/util_types_uint4.h" // -------------------------------------------------------- // START #include "util/util_types_float2.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT2_H_ #define _UTIL_TYPES_FLOAT2_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct float2 { float x, y; __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float2 make_float2(float x, float y); comp_device_inline void print_float2(const char *label, const float2& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float2(x, y) ((float2)(x, y)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT2_H_ */ // END #include "util/util_types_float2.h" // -------------------------------------------------------- // START #include "util/util_types_float3.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT3_H_ #define _UTIL_TYPES_FLOAT3_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct comp_try_align(16) float3 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float3(); __forceinline float3(const float3& a); __forceinline explicit float3(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float3& operator =(const float3& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float3 make_float3(float f); comp_device_inline float3 make_float3(float x, float y, float z); comp_device_inline void print_float3(const char *label, const float3& a); #endif /* _KERNEL_CPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float3(x, y, z) ((float3)(x, y, z)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT3_H_ */ // END #include "util/util_types_float3.h" // -------------------------------------------------------- // START #include "util/util_types_float4.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_FLOAT4_H_ #define _UTIL_TYPES_FLOAT4_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ struct int4; struct comp_try_align(16) float4 { #ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; }; __forceinline float4(); __forceinline float4(const float4& a); __forceinline explicit float4(const __m128& a); __forceinline operator const __m128&(void) const; __forceinline operator __m128&(void); __forceinline float4& operator =(const float4& a); #else /* __KERNEL_SSE__ */ float x, y, z, w; #endif /* __KERNEL_SSE__ */ __forceinline float operator[](int i) const; __forceinline float& operator[](int i); }; comp_device_inline float4 make_float4(float f); comp_device_inline float4 make_float4(float x, float y, float z, float w); comp_device_inline float4 make_float4(const int4& i); comp_device_inline void print_float4(const char *label, const float4& a); #endif /* _KERNEL_GPU_ */ #ifdef _KERNEL_OPENCL_ #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #endif COMP_NAMESPACE_END #endif /* _UTIL_TYPES_FLOAT4_H_ */ // END #include "util/util_types_float4.h" /* Vectorized types implementation. */ // -------------------------------------------------------- // START #include "util/util_types_int2_impl.h" // -------------------------------------------------------- /* * Copyright 2011-2017 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef _UTIL_TYPES_INT2_IMPL_H_ #define _UTIL_TYPES_INT2_IMPL_H_ #ifndef _UTIL_TYPES_H_ # error "Do not include this file directly, include util_types.h instead." #endif COMP_NAMESPACE_BEGIN #ifdef _KERNEL_CPU_ int int2::operator[](int i) const { util_assert(i >= 0); util_assert(i < 2);