forked from sascha/godot
commit
497653ab53
@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import resource_to_cpp
|
||||
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
env_oidn = env_modules.Clone()
|
||||
|
||||
# Thirdparty source files
|
||||
thirdparty_dir = "#thirdparty/oidn/"
|
||||
thirdparty_sources = [
|
||||
"core/api.cpp",
|
||||
"core/device.cpp",
|
||||
"core/filter.cpp",
|
||||
"core/network.cpp",
|
||||
"core/autoencoder.cpp",
|
||||
"core/transfer_function.cpp",
|
||||
"weights/rtlightmap_hdr.gen.cpp",
|
||||
"mkl-dnn/src/common/batch_normalization.cpp",
|
||||
"mkl-dnn/src/common/concat.cpp",
|
||||
"mkl-dnn/src/common/convolution.cpp",
|
||||
"mkl-dnn/src/common/convolution_pd.cpp",
|
||||
"mkl-dnn/src/common/deconvolution.cpp",
|
||||
"mkl-dnn/src/common/eltwise.cpp",
|
||||
"mkl-dnn/src/common/engine.cpp",
|
||||
"mkl-dnn/src/common/inner_product.cpp",
|
||||
"mkl-dnn/src/common/inner_product_pd.cpp",
|
||||
"mkl-dnn/src/common/lrn.cpp",
|
||||
"mkl-dnn/src/common/memory.cpp",
|
||||
"mkl-dnn/src/common/memory_desc_wrapper.cpp",
|
||||
"mkl-dnn/src/common/mkldnn_debug.cpp",
|
||||
"mkl-dnn/src/common/mkldnn_debug_autogenerated.cpp",
|
||||
"mkl-dnn/src/common/pooling.cpp",
|
||||
"mkl-dnn/src/common/primitive.cpp",
|
||||
"mkl-dnn/src/common/primitive_attr.cpp",
|
||||
"mkl-dnn/src/common/primitive_desc.cpp",
|
||||
"mkl-dnn/src/common/primitive_exec_types.cpp",
|
||||
"mkl-dnn/src/common/primitive_iterator.cpp",
|
||||
"mkl-dnn/src/common/query.cpp",
|
||||
"mkl-dnn/src/common/reorder.cpp",
|
||||
"mkl-dnn/src/common/rnn.cpp",
|
||||
"mkl-dnn/src/common/scratchpad.cpp",
|
||||
"mkl-dnn/src/common/shuffle.cpp",
|
||||
"mkl-dnn/src/common/softmax.cpp",
|
||||
"mkl-dnn/src/common/stream.cpp",
|
||||
"mkl-dnn/src/common/sum.cpp",
|
||||
"mkl-dnn/src/common/utils.cpp",
|
||||
"mkl-dnn/src/common/verbose.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_barrier.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_concat.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_engine.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_memory.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_reducer.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_reorder.cpp",
|
||||
"mkl-dnn/src/cpu/cpu_sum.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx2_convolution.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp",
|
||||
"mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp",
|
||||
"mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp",
|
||||
"mkl-dnn/src/cpu/jit_sse42_convolution.cpp",
|
||||
"mkl-dnn/src/cpu/jit_transpose_src_utils.cpp",
|
||||
"mkl-dnn/src/cpu/jit_uni_eltwise.cpp",
|
||||
"mkl-dnn/src/cpu/jit_uni_pool_kernel_f32.cpp",
|
||||
"mkl-dnn/src/cpu/jit_uni_pooling.cpp",
|
||||
"mkl-dnn/src/cpu/jit_uni_reorder.cpp",
|
||||
"mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp",
|
||||
"mkl-dnn/src/cpu/jit_utils/jit_utils.cpp",
|
||||
"mkl-dnn/src/cpu/jit_utils/jitprofiling/jitprofiling.c",
|
||||
"common/platform.cpp",
|
||||
"common/thread.cpp",
|
||||
"common/tensor.cpp",
|
||||
]
|
||||
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
|
||||
|
||||
thirdparty_include_dirs = [
|
||||
"",
|
||||
"include",
|
||||
"mkl-dnn/include",
|
||||
"mkl-dnn/src",
|
||||
"mkl-dnn/src/common",
|
||||
"mkl-dnn/src/cpu/xbyak",
|
||||
"mkl-dnn/src/cpu",
|
||||
]
|
||||
thirdparty_include_dirs = [thirdparty_dir + file for file in thirdparty_include_dirs]
|
||||
|
||||
|
||||
env_oidn.Prepend(CPPPATH=thirdparty_include_dirs)
|
||||
env_oidn.Append(
|
||||
CPPDEFINES=[
|
||||
"MKLDNN_THR=MKLDNN_THR_SEQ",
|
||||
"OIDN_STATIC_LIB",
|
||||
"__STDC_CONSTANT_MACROS",
|
||||
"__STDC_LIMIT_MACROS",
|
||||
"DISABLE_VERBOSE",
|
||||
"MKLDNN_ENABLE_CONCURRENT_EXEC",
|
||||
"NDEBUG",
|
||||
]
|
||||
)
|
||||
|
||||
env_thirdparty = env_oidn.Clone()
|
||||
env_thirdparty.disable_warnings()
|
||||
env_thirdparty.add_source_files(env.modules_sources, thirdparty_sources)
|
||||
|
||||
weights_in_path = thirdparty_dir + "weights/rtlightmap_hdr.tza"
|
||||
weights_out_path = thirdparty_dir + "weights/rtlightmap_hdr.gen.cpp"
|
||||
|
||||
env_thirdparty.Depends(weights_out_path, weights_in_path)
|
||||
env_thirdparty.CommandNoCache(weights_out_path, weights_in_path, resource_to_cpp.tza_to_cpp)
|
||||
|
||||
env_oidn.add_source_files(env.modules_sources, "denoise_wrapper.cpp")
|
||||
env_modules.add_source_files(env.modules_sources, ["register_types.cpp", "lightmap_denoiser.cpp"])
|
||||
@ -0,0 +1,15 @@
|
||||
def can_build(env, platform):
|
||||
# Thirdparty dependency OpenImage Denoise includes oneDNN library
|
||||
# which only supports 64-bit architectures.
|
||||
# It's also only relevant for tools build and desktop platforms,
|
||||
# as doing lightmap generation and denoising on Android or HTML5
|
||||
# would be a bit far-fetched.
|
||||
# Note: oneDNN doesn't support ARM64, OIDN needs updating to the latest version
|
||||
supported_platform = platform in ["x11", "osx", "windows", "server"]
|
||||
supported_bits = env["bits"] == "64"
|
||||
supported_arch = env["arch"] != "arm64"
|
||||
return env["tools"] and supported_platform and supported_bits and supported_arch
|
||||
|
||||
|
||||
def configure(env):
|
||||
pass
|
||||
@ -0,0 +1,67 @@
|
||||
/*************************************************************************/
|
||||
/* denoise_wrapper.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "denoise_wrapper.h"
|
||||
#include "core/os/copymem.h"
|
||||
#include "core/os/memory.h"
|
||||
#include "thirdparty/oidn/include/OpenImageDenoise/oidn.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void *oidn_denoiser_init() {
|
||||
OIDNDeviceImpl *device = oidnNewDevice(OIDN_DEVICE_TYPE_CPU);
|
||||
oidnCommitDevice(device);
|
||||
return device;
|
||||
}
|
||||
|
||||
bool oidn_denoise(void *deviceptr, float *p_floats, int p_width, int p_height) {
|
||||
OIDNDeviceImpl *device = (OIDNDeviceImpl *)deviceptr;
|
||||
OIDNFilter filter = oidnNewFilter(device, "RTLightmap");
|
||||
void *input_buffer = memalloc(p_width * p_height * 3 * sizeof(float));
|
||||
copymem(input_buffer, p_floats, p_width * p_height * 3 * sizeof(float));
|
||||
oidnSetSharedFilterImage(filter, "color", input_buffer, OIDN_FORMAT_FLOAT3, p_width, p_height, 0, 0, 0);
|
||||
oidnSetSharedFilterImage(filter, "output", (void *)p_floats, OIDN_FORMAT_FLOAT3, p_width, p_height, 0, 0, 0);
|
||||
oidnSetFilter1b(filter, "hdr", true);
|
||||
oidnCommitFilter(filter);
|
||||
oidnExecuteFilter(filter);
|
||||
|
||||
const char *msg;
|
||||
bool success = true;
|
||||
if (oidnGetDeviceError(device, &msg) != OIDN_ERROR_NONE) {
|
||||
printf("LightmapDenoiser: %s\n", msg);
|
||||
success = false;
|
||||
}
|
||||
|
||||
oidnReleaseFilter(filter);
|
||||
return success;
|
||||
}
|
||||
|
||||
void oidn_denoiser_finish(void *device) {
|
||||
oidnReleaseDevice((OIDNDeviceImpl *)device);
|
||||
}
|
||||
@ -0,0 +1,38 @@
|
||||
/*************************************************************************/
|
||||
/* denoise_wrapper.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef DENOISE_WRAPPER_H
|
||||
#define DENOISE_WRAPPER_H
|
||||
|
||||
void *oidn_denoiser_init();
|
||||
bool oidn_denoise(void *device, float *p_floats, int p_width, int p_height);
|
||||
void oidn_denoiser_finish(void *device);
|
||||
|
||||
#endif // DENOISE_WRAPPER_H
|
||||
@ -0,0 +1,66 @@
|
||||
/*************************************************************************/
|
||||
/* lightmap_denoiser.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "lightmap_denoiser.h"
|
||||
|
||||
#include "denoise_wrapper.h"
|
||||
|
||||
LightmapDenoiser *LightmapDenoiserOIDN::create_oidn_denoiser() {
|
||||
return memnew(LightmapDenoiserOIDN);
|
||||
}
|
||||
|
||||
void LightmapDenoiserOIDN::make_default_denoiser() {
|
||||
create_function = create_oidn_denoiser;
|
||||
}
|
||||
|
||||
Ref<Image> LightmapDenoiserOIDN::denoise_image(const Ref<Image> &p_image) {
|
||||
Ref<Image> img = p_image->duplicate();
|
||||
|
||||
img->convert(Image::FORMAT_RGBF);
|
||||
|
||||
PoolByteArray data = img->get_data();
|
||||
{
|
||||
PoolByteArray::Write w = data.write();
|
||||
if (!oidn_denoise(device, (float *)w.ptr(), img->get_width(), img->get_height())) {
|
||||
return p_image;
|
||||
}
|
||||
}
|
||||
|
||||
img->create(img->get_width(), img->get_height(), false, img->get_format(), data);
|
||||
return img;
|
||||
}
|
||||
|
||||
LightmapDenoiserOIDN::LightmapDenoiserOIDN() {
|
||||
device = oidn_denoiser_init();
|
||||
}
|
||||
|
||||
LightmapDenoiserOIDN::~LightmapDenoiserOIDN() {
|
||||
oidn_denoiser_finish(device);
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
/*************************************************************************/
|
||||
/* lightmap_denoiser.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef LIGHTMAP_DENOISER_H
|
||||
#define LIGHTMAP_DENOISER_H
|
||||
|
||||
#include "core/class_db.h"
|
||||
#include "scene/3d/lightmapper.h"
|
||||
|
||||
struct OIDNDeviceImpl;
|
||||
|
||||
class LightmapDenoiserOIDN : public LightmapDenoiser {
|
||||
GDCLASS(LightmapDenoiserOIDN, LightmapDenoiser);
|
||||
|
||||
protected:
|
||||
void *device = nullptr;
|
||||
|
||||
public:
|
||||
static LightmapDenoiser *create_oidn_denoiser();
|
||||
|
||||
Ref<Image> denoise_image(const Ref<Image> &p_image);
|
||||
|
||||
static void make_default_denoiser();
|
||||
|
||||
LightmapDenoiserOIDN();
|
||||
~LightmapDenoiserOIDN();
|
||||
};
|
||||
|
||||
#endif // LIGHTMAP_DENOISER_H
|
||||
@ -0,0 +1,41 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "core/engine.h"
|
||||
#include "lightmap_denoiser.h"
|
||||
|
||||
void register_denoise_types() {
|
||||
LightmapDenoiserOIDN::make_default_denoiser();
|
||||
}
|
||||
|
||||
void unregister_denoise_types() {
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef DENOISE_REGISTER_TYPES_H
|
||||
#define DENOISE_REGISTER_TYPES_H
|
||||
|
||||
void register_denoise_types();
|
||||
void unregister_denoise_types();
|
||||
|
||||
#endif // DENOISE_REGISTER_TYPES_H
|
||||
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
## ======================================================================== ##
|
||||
## Copyright 2009-2019 Intel Corporation ##
|
||||
## ##
|
||||
## Licensed under the Apache License, Version 2.0 (the "License"); ##
|
||||
## you may not use this file except in compliance with the License. ##
|
||||
## You may obtain a copy of the License at ##
|
||||
## ##
|
||||
## http://www.apache.org/licenses/LICENSE-2.0 ##
|
||||
## ##
|
||||
## Unless required by applicable law or agreed to in writing, software ##
|
||||
## distributed under the License is distributed on an "AS IS" BASIS, ##
|
||||
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ##
|
||||
## See the License for the specific language governing permissions and ##
|
||||
## limitations under the License. ##
|
||||
## ======================================================================== ##
|
||||
|
||||
import os
|
||||
from array import array
|
||||
|
||||
# Generates a C++ file from the specified binary resource file
|
||||
def generate(in_path, out_path):
|
||||
|
||||
namespace = "oidn::weights"
|
||||
scopes = namespace.split("::")
|
||||
|
||||
file_name = os.path.basename(in_path)
|
||||
var_name = os.path.splitext(file_name)[0]
|
||||
|
||||
with open(in_path, "rb") as in_file, open(out_path, "w") as out_file:
|
||||
# Header
|
||||
out_file.write("// Generated from: %s\n" % file_name)
|
||||
out_file.write("#include <cstddef>\n\n")
|
||||
|
||||
# Open the namespaces
|
||||
for s in scopes:
|
||||
out_file.write("namespace %s {\n" % s)
|
||||
if scopes:
|
||||
out_file.write("\n")
|
||||
|
||||
# Read the file
|
||||
in_data = array("B", in_file.read())
|
||||
|
||||
# Write the size
|
||||
out_file.write("//const size_t %s_size = %d;\n\n" % (var_name, len(in_data)))
|
||||
|
||||
# Write the data
|
||||
out_file.write("unsigned char %s[] = {" % var_name)
|
||||
for i in range(len(in_data)):
|
||||
c = in_data[i]
|
||||
if i > 0:
|
||||
out_file.write(",")
|
||||
if (i + 1) % 20 == 1:
|
||||
out_file.write("\n")
|
||||
out_file.write("%d" % c)
|
||||
out_file.write("\n};\n")
|
||||
|
||||
# Close the namespaces
|
||||
if scopes:
|
||||
out_file.write("\n")
|
||||
for scope in reversed(scopes):
|
||||
out_file.write("} // namespace %s\n" % scope)
|
||||
|
||||
|
||||
def tza_to_cpp(target, source, env):
|
||||
for x in zip(source, target):
|
||||
generate(str(x[0]), str(x[1]))
|
||||
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
env_lightmapper_rd = env_modules.Clone()
|
||||
# Godot source files
|
||||
env_lightmapper_rd.Prepend(CPPPATH=["#thirdparty/embree/include"])
|
||||
env_lightmapper_rd.add_source_files(env.modules_sources, "*.cpp")
|
||||
@ -0,0 +1,6 @@
|
||||
def can_build(env, platform):
|
||||
return env["tools"] and env["module_raycast_enabled"]
|
||||
|
||||
|
||||
def configure(env):
|
||||
pass
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,183 @@
|
||||
/*************************************************************************/
|
||||
/* lightmapper_cpu.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef LIGHTMAPPER_CPU_H
|
||||
#define LIGHTMAPPER_CPU_H
|
||||
|
||||
#include "core/local_vector.h"
|
||||
#include "scene/3d/lightmapper.h"
|
||||
#include "scene/resources/mesh.h"
|
||||
#include "scene/resources/surface_tool.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
class LightmapperCPU : public Lightmapper {
|
||||
GDCLASS(LightmapperCPU, Lightmapper)
|
||||
|
||||
struct MeshInstance {
|
||||
MeshData data;
|
||||
int slice = 0;
|
||||
Vector2i offset;
|
||||
Vector2i size;
|
||||
bool cast_shadows;
|
||||
bool generate_lightmap;
|
||||
String node_name;
|
||||
};
|
||||
|
||||
struct Light {
|
||||
Vector3 position;
|
||||
uint32_t type = LIGHT_TYPE_DIRECTIONAL;
|
||||
Vector3 direction;
|
||||
float energy;
|
||||
float indirect_multiplier;
|
||||
Color color;
|
||||
float range;
|
||||
float attenuation;
|
||||
float spot_angle;
|
||||
float spot_attenuation;
|
||||
bool bake_direct;
|
||||
};
|
||||
|
||||
struct LightmapTexel {
|
||||
Vector3 albedo;
|
||||
float alpha;
|
||||
Vector3 emission;
|
||||
Vector3 pos;
|
||||
Vector3 normal;
|
||||
|
||||
Vector3 direct_light;
|
||||
Vector3 output_light;
|
||||
|
||||
float area_coverage;
|
||||
};
|
||||
|
||||
struct BakeParams {
|
||||
float bias;
|
||||
int bounces;
|
||||
int samples;
|
||||
bool use_denoiser = true;
|
||||
Ref<Image> environment_panorama;
|
||||
Basis environment_transform;
|
||||
};
|
||||
|
||||
struct UVSeam {
|
||||
Vector2 edge0[2];
|
||||
Vector2 edge1[2];
|
||||
};
|
||||
|
||||
struct SeamEdge {
|
||||
Vector3 pos[2];
|
||||
Vector3 normal[2];
|
||||
Vector2 uv[2];
|
||||
|
||||
_FORCE_INLINE_ bool operator<(const SeamEdge &p_edge) const {
|
||||
return pos[0].x < p_edge.pos[0].x;
|
||||
}
|
||||
};
|
||||
|
||||
struct AtlasOffset {
|
||||
int slice;
|
||||
int x;
|
||||
int y;
|
||||
};
|
||||
|
||||
struct ThreadData;
|
||||
|
||||
typedef void (LightmapperCPU::*BakeThreadFunc)(uint32_t, void *);
|
||||
|
||||
struct ThreadData {
|
||||
LightmapperCPU *instance;
|
||||
uint32_t count;
|
||||
BakeThreadFunc thread_func;
|
||||
void *userdata;
|
||||
};
|
||||
|
||||
BakeParams parameters;
|
||||
|
||||
LocalVector<Ref<Image> > bake_textures;
|
||||
Map<RID, Ref<Image> > albedo_textures;
|
||||
Map<RID, Ref<Image> > emission_textures;
|
||||
|
||||
LocalVector<MeshInstance> mesh_instances;
|
||||
LocalVector<Light> lights;
|
||||
|
||||
LocalVector<LocalVector<LightmapTexel> > scene_lightmaps;
|
||||
LocalVector<LocalVector<int> > scene_lightmap_indices;
|
||||
Set<int> no_shadow_meshes;
|
||||
|
||||
std::atomic<uint32_t> thread_progress;
|
||||
std::atomic<bool> thread_cancelled;
|
||||
|
||||
Ref<LightmapRaycaster> raycaster;
|
||||
|
||||
Error _layout_atlas(int p_max_size, Vector2i *r_atlas_size, int *r_atlas_slices);
|
||||
|
||||
static void _thread_func_callback(void *p_thread_data);
|
||||
void _thread_func_wrapper(uint32_t p_idx, ThreadData *p_thread_data);
|
||||
bool _parallel_run(int p_count, const String &p_description, BakeThreadFunc p_thread_func, void *p_userdata, BakeStepFunc p_substep_func = nullptr);
|
||||
|
||||
void _generate_buffer(uint32_t p_idx, void *p_unused);
|
||||
Ref<Image> _init_bake_texture(const MeshData::TextureDef &p_texture_def, const Map<RID, Ref<Image> > &p_tex_cache, Image::Format p_default_format);
|
||||
Color _bilinear_sample(const Ref<Image> &p_img, const Vector2 &p_uv, bool p_clamp_x = false, bool p_clamp_y = false);
|
||||
Vector3 _fix_sample_position(const Vector3 &p_position, const Vector3 &p_texel_center, const Vector3 &p_normal, const Vector3 &p_tangent, const Vector3 &p_bitangent, const Vector2 &p_texel_size);
|
||||
void _plot_triangle(const Vector2 *p_vertices, const Vector3 *p_positions, const Vector3 *p_normals, const Vector2 *p_uvs, const Ref<Image> &p_albedo_texture, const Ref<Image> &p_emission_texture, Vector2i p_size, LocalVector<LightmapTexel> &r_texels, LocalVector<int> &r_lightmap_indices);
|
||||
|
||||
void _compute_direct_light(uint32_t p_idx, void *r_lightmap);
|
||||
|
||||
void _compute_indirect_light(uint32_t p_idx, void *r_lightmap);
|
||||
|
||||
void _post_process(uint32_t p_idx, void *r_output);
|
||||
void _compute_seams(const MeshInstance &p_mesh, LocalVector<UVSeam> &r_seams);
|
||||
void _fix_seams(const LocalVector<UVSeam> &p_seams, Vector3 *r_lightmap, Vector2i p_size);
|
||||
void _fix_seam(const Vector2 &p_pos0, const Vector2 &p_pos1, const Vector2 &p_uv0, const Vector2 &p_uv1, const Vector3 *p_read_buffer, Vector3 *r_write_buffer, const Vector2i &p_size);
|
||||
void _dilate_lightmap(Vector3 *r_lightmap, const LocalVector<int> p_indices, Vector2i p_size, int margin);
|
||||
|
||||
void _blit_lightmap(const Vector<Vector3> &p_src, const Vector2i &p_size, Ref<Image> &p_dst, int p_x, int p_y, bool p_with_padding);
|
||||
|
||||
public:
|
||||
virtual void add_albedo_texture(Ref<Texture> p_texture);
|
||||
virtual void add_emission_texture(Ref<Texture> p_texture);
|
||||
virtual void add_mesh(const MeshData &p_mesh, Vector2i p_size);
|
||||
virtual void add_directional_light(bool p_bake_direct, const Vector3 &p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier);
|
||||
virtual void add_omni_light(bool p_bake_direct, const Vector3 &p_position, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation);
|
||||
virtual void add_spot_light(bool p_bake_direct, const Vector3 &p_position, const Vector3 p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation, float p_spot_angle, float p_spot_attenuation);
|
||||
virtual BakeError bake(BakeQuality p_quality, bool p_use_denoiser, int p_bounces, float p_bias, bool p_generate_atlas, int p_max_texture_size, const Ref<Image> &p_environment_panorama, const Basis &p_environment_transform, BakeStepFunc p_step_function = nullptr, void *p_bake_userdata = nullptr, BakeStepFunc p_substep_function = nullptr);
|
||||
|
||||
int get_bake_texture_count() const;
|
||||
Ref<Image> get_bake_texture(int p_index) const;
|
||||
int get_bake_mesh_count() const;
|
||||
Variant get_bake_mesh_userdata(int p_index) const;
|
||||
Rect2 get_bake_mesh_uv_scale(int p_index) const;
|
||||
int get_bake_mesh_texture_slice(int p_index) const;
|
||||
|
||||
LightmapperCPU();
|
||||
};
|
||||
|
||||
#endif // LIGHTMAPPER_H
|
||||
@ -0,0 +1,54 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "core/project_settings.h"
|
||||
#include "lightmapper_cpu.h"
|
||||
#include "scene/3d/lightmapper.h"
|
||||
|
||||
#ifndef _3D_DISABLED
|
||||
static Lightmapper *create_lightmapper_cpu() {
|
||||
return memnew(LightmapperCPU);
|
||||
}
|
||||
#endif
|
||||
|
||||
void register_lightmapper_cpu_types() {
|
||||
GLOBAL_DEF("rendering/cpu_lightmapper/quality/low_quality_ray_count", 64);
|
||||
GLOBAL_DEF("rendering/cpu_lightmapper/quality/medium_quality_ray_count", 256);
|
||||
GLOBAL_DEF("rendering/cpu_lightmapper/quality/high_quality_ray_count", 512);
|
||||
GLOBAL_DEF("rendering/cpu_lightmapper/quality/ultra_quality_ray_count", 1024);
|
||||
#ifndef _3D_DISABLED
|
||||
Lightmapper::create_cpu = create_lightmapper_cpu;
|
||||
#endif
|
||||
}
|
||||
|
||||
void unregister_lightmapper_cpu_types() {
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef LIGHTMAPPER_CPU_REGISTER_TYPES_H
|
||||
#define LIGHTMAPPER_CPU_REGISTER_TYPES_H
|
||||
|
||||
void register_lightmapper_cpu_types();
|
||||
void unregister_lightmapper_cpu_types();
|
||||
|
||||
#endif // LIGHTMAPPER_CPU_REGISTER_TYPES_H
|
||||
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
embree_src = [
|
||||
"common/sys/sysinfo.cpp",
|
||||
"common/sys/alloc.cpp",
|
||||
"common/sys/filename.cpp",
|
||||
"common/sys/library.cpp",
|
||||
"common/sys/thread.cpp",
|
||||
"common/sys/string.cpp",
|
||||
"common/sys/regression.cpp",
|
||||
"common/sys/mutex.cpp",
|
||||
"common/sys/condition.cpp",
|
||||
"common/sys/barrier.cpp",
|
||||
"common/math/constants.cpp",
|
||||
"common/simd/sse.cpp",
|
||||
"common/lexers/stringstream.cpp",
|
||||
"common/lexers/tokenstream.cpp",
|
||||
"common/tasking/taskschedulerinternal.cpp",
|
||||
"common/algorithms/parallel_for.cpp",
|
||||
"common/algorithms/parallel_reduce.cpp",
|
||||
"common/algorithms/parallel_prefix_sum.cpp",
|
||||
"common/algorithms/parallel_for_for.cpp",
|
||||
"common/algorithms/parallel_for_for_prefix_sum.cpp",
|
||||
"common/algorithms/parallel_partition.cpp",
|
||||
"common/algorithms/parallel_sort.cpp",
|
||||
"common/algorithms/parallel_set.cpp",
|
||||
"common/algorithms/parallel_map.cpp",
|
||||
"common/algorithms/parallel_filter.cpp",
|
||||
"kernels/common/device.cpp",
|
||||
"kernels/common/stat.cpp",
|
||||
"kernels/common/acceln.cpp",
|
||||
"kernels/common/accelset.cpp",
|
||||
"kernels/common/state.cpp",
|
||||
"kernels/common/rtcore.cpp",
|
||||
"kernels/common/rtcore_builder.cpp",
|
||||
"kernels/common/scene.cpp",
|
||||
"kernels/common/alloc.cpp",
|
||||
"kernels/common/geometry.cpp",
|
||||
"kernels/common/scene_triangle_mesh.cpp",
|
||||
"kernels/geometry/primitive4.cpp",
|
||||
"kernels/builders/primrefgen.cpp",
|
||||
"kernels/bvh/bvh.cpp",
|
||||
"kernels/bvh/bvh_statistics.cpp",
|
||||
"kernels/bvh/bvh4_factory.cpp",
|
||||
"kernels/bvh/bvh8_factory.cpp",
|
||||
"kernels/bvh/bvh_collider.cpp",
|
||||
"kernels/bvh/bvh_rotate.cpp",
|
||||
"kernels/bvh/bvh_refit.cpp",
|
||||
"kernels/bvh/bvh_builder.cpp",
|
||||
"kernels/bvh/bvh_builder_morton.cpp",
|
||||
"kernels/bvh/bvh_builder_sah.cpp",
|
||||
"kernels/bvh/bvh_builder_sah_spatial.cpp",
|
||||
"kernels/bvh/bvh_builder_sah_mb.cpp",
|
||||
"kernels/bvh/bvh_builder_twolevel.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
]
|
||||
|
||||
embree_dir = "#thirdparty/embree/"
|
||||
|
||||
env_embree = env_modules.Clone()
|
||||
embree_sources = [embree_dir + file for file in embree_src]
|
||||
env_embree.Prepend(CPPPATH=[embree_dir, embree_dir + "include"])
|
||||
env_embree.Append(
|
||||
CPPFLAGS=[
|
||||
"-DEMBREE_TARGET_SSE2",
|
||||
"-DEMBREE_LOWEST_ISA",
|
||||
"-msse2",
|
||||
"-DTASKING_INTERNAL",
|
||||
"-DNDEBUG",
|
||||
"-D__SSE2__",
|
||||
"-D__SSE__",
|
||||
]
|
||||
)
|
||||
|
||||
if not env_embree.msvc:
|
||||
env_embree.Append(CPPFLAGS=["-mxsave"])
|
||||
|
||||
if env["platform"] == "windows":
|
||||
if env.msvc:
|
||||
env.Append(LINKFLAGS=["psapi.lib"])
|
||||
else:
|
||||
env.Append(LIBS=["psapi"])
|
||||
|
||||
env_embree.disable_warnings()
|
||||
env_embree.add_source_files(env.modules_sources, embree_sources)
|
||||
|
||||
env_raycast = env_modules.Clone()
|
||||
env_raycast.Prepend(CPPPATH=[embree_dir, embree_dir + "include", embree_dir + "common"])
|
||||
|
||||
env_raycast.add_source_files(env.modules_sources, "*.cpp")
|
||||
@ -0,0 +1,13 @@
|
||||
def can_build(env, platform):
|
||||
# Embree requires at least SSE2 to be available, so 32-bit and ARM64 builds are
|
||||
# not supported.
|
||||
# It's also only relevant for tools build and desktop platforms,
|
||||
# as doing lightmap generation on Android or HTML5 would be a bit far-fetched.
|
||||
supported_platform = platform in ["x11", "osx", "windows", "server"]
|
||||
supported_bits = env["bits"] == "64"
|
||||
supported_arch = env["arch"] != "arm64"
|
||||
return env["tools"] and supported_platform and supported_bits and supported_arch
|
||||
|
||||
|
||||
def configure(env):
|
||||
pass
|
||||
@ -0,0 +1,259 @@
|
||||
import glob, os, shutil, subprocess, re
|
||||
|
||||
include_dirs = [
|
||||
"common/tasking",
|
||||
"kernels/bvh",
|
||||
"kernels/builders",
|
||||
"common/sys",
|
||||
"kernels",
|
||||
"kernels/common",
|
||||
"common/math",
|
||||
"common/algorithms",
|
||||
"common/lexers",
|
||||
"common/simd",
|
||||
"include/embree3",
|
||||
"kernels/subdiv",
|
||||
"kernels/geometry",
|
||||
]
|
||||
|
||||
cpp_files = [
|
||||
"common/sys/sysinfo.cpp",
|
||||
"common/sys/alloc.cpp",
|
||||
"common/sys/filename.cpp",
|
||||
"common/sys/library.cpp",
|
||||
"common/sys/thread.cpp",
|
||||
"common/sys/string.cpp",
|
||||
"common/sys/regression.cpp",
|
||||
"common/sys/mutex.cpp",
|
||||
"common/sys/condition.cpp",
|
||||
"common/sys/barrier.cpp",
|
||||
"common/math/constants.cpp",
|
||||
"common/simd/sse.cpp",
|
||||
"common/lexers/stringstream.cpp",
|
||||
"common/lexers/tokenstream.cpp",
|
||||
"common/tasking/taskschedulerinternal.cpp",
|
||||
"common/algorithms/parallel_for.cpp",
|
||||
"common/algorithms/parallel_reduce.cpp",
|
||||
"common/algorithms/parallel_prefix_sum.cpp",
|
||||
"common/algorithms/parallel_for_for.cpp",
|
||||
"common/algorithms/parallel_for_for_prefix_sum.cpp",
|
||||
"common/algorithms/parallel_partition.cpp",
|
||||
"common/algorithms/parallel_sort.cpp",
|
||||
"common/algorithms/parallel_set.cpp",
|
||||
"common/algorithms/parallel_map.cpp",
|
||||
"common/algorithms/parallel_filter.cpp",
|
||||
"kernels/common/device.cpp",
|
||||
"kernels/common/stat.cpp",
|
||||
"kernels/common/acceln.cpp",
|
||||
"kernels/common/accelset.cpp",
|
||||
"kernels/common/state.cpp",
|
||||
"kernels/common/rtcore.cpp",
|
||||
"kernels/common/rtcore_builder.cpp",
|
||||
"kernels/common/scene.cpp",
|
||||
"kernels/common/alloc.cpp",
|
||||
"kernels/common/geometry.cpp",
|
||||
"kernels/common/scene_triangle_mesh.cpp",
|
||||
"kernels/geometry/primitive4.cpp",
|
||||
"kernels/builders/primrefgen.cpp",
|
||||
"kernels/bvh/bvh.cpp",
|
||||
"kernels/bvh/bvh_statistics.cpp",
|
||||
"kernels/bvh/bvh4_factory.cpp",
|
||||
"kernels/bvh/bvh8_factory.cpp",
|
||||
"kernels/bvh/bvh_collider.cpp",
|
||||
"kernels/bvh/bvh_rotate.cpp",
|
||||
"kernels/bvh/bvh_refit.cpp",
|
||||
"kernels/bvh/bvh_builder.cpp",
|
||||
"kernels/bvh/bvh_builder_morton.cpp",
|
||||
"kernels/bvh/bvh_builder_sah.cpp",
|
||||
"kernels/bvh/bvh_builder_sah_spatial.cpp",
|
||||
"kernels/bvh/bvh_builder_sah_mb.cpp",
|
||||
"kernels/bvh/bvh_builder_twolevel.cpp",
|
||||
"kernels/bvh/bvh_intersector1.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh4.cpp",
|
||||
]
|
||||
|
||||
os.chdir("../../thirdparty")
|
||||
|
||||
if os.path.exists("embree"):
|
||||
shutil.rmtree("embree")
|
||||
|
||||
subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"])
|
||||
os.chdir("embree-tmp")
|
||||
|
||||
commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
|
||||
|
||||
dest_dir = "../embree"
|
||||
all_files = set(cpp_files)
|
||||
|
||||
for include_dir in include_dirs:
|
||||
headers = glob.iglob(os.path.join(include_dir, "*.h"))
|
||||
all_files.update(headers)
|
||||
|
||||
for f in all_files:
|
||||
d = os.path.join(dest_dir, os.path.dirname(f))
|
||||
if not os.path.exists(d):
|
||||
os.makedirs(d)
|
||||
shutil.copy2(f, d)
|
||||
|
||||
with open(os.path.join(dest_dir, "kernels/hash.h"), "w") as hash_file:
|
||||
hash_file.write(
|
||||
f"""
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#define RTC_HASH "{commit_hash}"
|
||||
"""
|
||||
)
|
||||
|
||||
with open(os.path.join(dest_dir, "kernels/config.h"), "w") as config_file:
|
||||
config_file.write(
|
||||
"""
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/* #undef EMBREE_RAY_MASK */
|
||||
/* #undef EMBREE_STAT_COUNTERS */
|
||||
/* #undef EMBREE_BACKFACE_CULLING */
|
||||
/* #undef EMBREE_BACKFACE_CULLING_CURVES */
|
||||
#define EMBREE_FILTER_FUNCTION
|
||||
/* #undef EMBREE_IGNORE_INVALID_RAYS */
|
||||
#define EMBREE_GEOMETRY_TRIANGLE
|
||||
/* #undef EMBREE_GEOMETRY_QUAD */
|
||||
/* #undef EMBREE_GEOMETRY_CURVE */
|
||||
/* #undef EMBREE_GEOMETRY_SUBDIVISION */
|
||||
/* #undef EMBREE_GEOMETRY_USER */
|
||||
/* #undef EMBREE_GEOMETRY_INSTANCE */
|
||||
/* #undef EMBREE_GEOMETRY_GRID */
|
||||
/* #undef EMBREE_GEOMETRY_POINT */
|
||||
/* #undef EMBREE_RAY_PACKETS */
|
||||
/* #undef EMBREE_COMPACT_POLYS */
|
||||
|
||||
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_TRIANGLE)
|
||||
#define IF_ENABLED_TRIS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_TRIS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_QUAD)
|
||||
#define IF_ENABLED_QUADS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_QUADS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
|
||||
#define IF_ENABLED_CURVES_OR_POINTS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_CURVES_OR_POINTS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_CURVE)
|
||||
#define IF_ENABLED_CURVES(x) x
|
||||
#else
|
||||
#define IF_ENABLED_CURVES(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_POINT)
|
||||
#define IF_ENABLED_POINTS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_POINTS(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
|
||||
#define IF_ENABLED_SUBDIV(x) x
|
||||
#else
|
||||
#define IF_ENABLED_SUBDIV(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_USER)
|
||||
#define IF_ENABLED_USER(x) x
|
||||
#else
|
||||
#define IF_ENABLED_USER(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_INSTANCE)
|
||||
#define IF_ENABLED_INSTANCE(x) x
|
||||
#else
|
||||
#define IF_ENABLED_INSTANCE(x)
|
||||
#endif
|
||||
|
||||
#if defined(EMBREE_GEOMETRY_GRID)
|
||||
#define IF_ENABLED_GRIDS(x) x
|
||||
#else
|
||||
#define IF_ENABLED_GRIDS(x)
|
||||
#endif
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
with open("CMakeLists.txt", "r") as cmake_file:
|
||||
cmake_content = cmake_file.read()
|
||||
major_version = int(re.compile(r"EMBREE_VERSION_MAJOR\s(\d+)").findall(cmake_content)[0])
|
||||
minor_version = int(re.compile(r"EMBREE_VERSION_MINOR\s(\d+)").findall(cmake_content)[0])
|
||||
patch_version = int(re.compile(r"EMBREE_VERSION_PATCH\s(\d+)").findall(cmake_content)[0])
|
||||
|
||||
with open(os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w") as config_file:
|
||||
config_file.write(
|
||||
f"""
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#define RTC_VERSION_MAJOR {major_version}
|
||||
#define RTC_VERSION_MINOR {minor_version}
|
||||
#define RTC_VERSION_PATCH {patch_version}
|
||||
#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}
|
||||
#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"
|
||||
|
||||
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
|
||||
|
||||
#define EMBREE_MIN_WIDTH 0
|
||||
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
|
||||
|
||||
#define EMBREE_STATIC_LIB
|
||||
/* #undef EMBREE_API_NAMESPACE */
|
||||
|
||||
#if defined(EMBREE_API_NAMESPACE)
|
||||
# define RTC_NAMESPACE
|
||||
# define RTC_NAMESPACE_BEGIN namespace {{
|
||||
# define RTC_NAMESPACE_END }}
|
||||
# define RTC_NAMESPACE_USE using namespace ;
|
||||
# define RTC_API_EXTERN_C
|
||||
# undef EMBREE_API_NAMESPACE
|
||||
#else
|
||||
# define RTC_NAMESPACE_BEGIN
|
||||
# define RTC_NAMESPACE_END
|
||||
# define RTC_NAMESPACE_USE
|
||||
# if defined(__cplusplus)
|
||||
# define RTC_API_EXTERN_C extern "C"
|
||||
# else
|
||||
# define RTC_API_EXTERN_C
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ISPC)
|
||||
# define RTC_API_IMPORT extern "C" unmasked
|
||||
# define RTC_API_EXPORT extern "C" unmasked
|
||||
#elif defined(EMBREE_STATIC_LIB)
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C
|
||||
#elif defined(_WIN32)
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C __declspec(dllimport)
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C __declspec(dllexport)
|
||||
#else
|
||||
# define RTC_API_IMPORT RTC_API_EXTERN_C
|
||||
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
|
||||
#endif
|
||||
|
||||
#if defined(RTC_EXPORT_API)
|
||||
# define RTC_API RTC_API_EXPORT
|
||||
#else
|
||||
# define RTC_API RTC_API_IMPORT
|
||||
#endif
|
||||
"""
|
||||
)
|
||||
|
||||
os.chdir("..")
|
||||
shutil.rmtree("embree-tmp")
|
||||
@ -0,0 +1,198 @@
|
||||
/*************************************************************************/
|
||||
/* lightmap_raycaster.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "lightmap_raycaster.h"
|
||||
|
||||
// From Embree.
|
||||
#include <math/vec2.h>
|
||||
#include <math/vec3.h>
|
||||
|
||||
using namespace embree;
|
||||
|
||||
LightmapRaycaster *LightmapRaycasterEmbree::create_embree_raycaster() {
|
||||
return memnew(LightmapRaycasterEmbree);
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::make_default_raycaster() {
|
||||
create_function = create_embree_raycaster;
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::filter_function(const struct RTCFilterFunctionNArguments *p_args) {
|
||||
|
||||
RTCHit *hit = (RTCHit *)p_args->hit;
|
||||
|
||||
unsigned int geomID = hit->geomID;
|
||||
float u = hit->u;
|
||||
float v = hit->v;
|
||||
|
||||
LightmapRaycasterEmbree *scene = (LightmapRaycasterEmbree *)p_args->geometryUserPtr;
|
||||
RTCGeometry geom = rtcGetGeometry(scene->embree_scene, geomID);
|
||||
|
||||
rtcInterpolate0(geom, hit->primID, hit->u, hit->v, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, &hit->u, 2);
|
||||
|
||||
if (scene->alpha_textures.has(geomID)) {
|
||||
const AlphaTextureData &alpha_texture = scene->alpha_textures[geomID];
|
||||
|
||||
if (alpha_texture.sample(hit->u, hit->v) < 128) {
|
||||
p_args->valid[0] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
rtcInterpolate0(geom, hit->primID, u, v, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, &hit->Ng_x, 3);
|
||||
}
|
||||
|
||||
bool LightmapRaycasterEmbree::intersect(Ray &r_ray) {
|
||||
RTCIntersectContext context;
|
||||
|
||||
rtcInitIntersectContext(&context);
|
||||
|
||||
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
|
||||
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::intersect(Vector<Ray> &r_rays) {
|
||||
Ray *rays = r_rays.ptrw();
|
||||
for (int i = 0; i < r_rays.size(); ++i) {
|
||||
intersect(rays[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id) {
|
||||
if (p_alpha_texture.is_valid() && p_alpha_texture->get_size() != Vector2i()) {
|
||||
AlphaTextureData tex;
|
||||
tex.size = p_alpha_texture->get_size();
|
||||
tex.data.resize(tex.size.x * tex.size.y);
|
||||
|
||||
{
|
||||
PoolVector<uint8_t>::Read r = p_alpha_texture->get_data().read();
|
||||
uint8_t *ptrw = tex.data.ptrw();
|
||||
for (int i = 0; i < tex.size.x * tex.size.y; ++i) {
|
||||
ptrw[i] = r[i];
|
||||
}
|
||||
}
|
||||
|
||||
alpha_textures.insert(p_id, tex);
|
||||
}
|
||||
}
|
||||
|
||||
float blerp(float c00, float c10, float c01, float c11, float tx, float ty) {
|
||||
return Math::lerp(Math::lerp(c00, c10, tx), Math::lerp(c01, c11, tx), ty);
|
||||
}
|
||||
|
||||
uint8_t LightmapRaycasterEmbree::AlphaTextureData::sample(float u, float v) const {
|
||||
float x = u * size.x;
|
||||
float y = v * size.y;
|
||||
int xi = (int)x;
|
||||
int yi = (int)y;
|
||||
|
||||
uint8_t texels[4];
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int sample_x = CLAMP(xi + i % 2, 0, size.x - 1);
|
||||
int sample_y = CLAMP(yi + i / 2, 0, size.y - 1);
|
||||
texels[i] = data[sample_y * size.x + sample_x];
|
||||
}
|
||||
|
||||
return Math::round(blerp(texels[0], texels[1], texels[2], texels[3], x - xi, y - yi));
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id) {
|
||||
|
||||
RTCGeometry embree_mesh = rtcNewGeometry(embree_device, RTC_GEOMETRY_TYPE_TRIANGLE);
|
||||
|
||||
rtcSetGeometryVertexAttributeCount(embree_mesh, 2);
|
||||
|
||||
int vertex_count = p_vertices.size();
|
||||
|
||||
ERR_FAIL_COND(vertex_count % 3 != 0);
|
||||
ERR_FAIL_COND(vertex_count != p_uv2s.size());
|
||||
|
||||
Vec3fa *embree_vertices = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count);
|
||||
Vec2fa *embree_light_uvs = (Vec2fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 0, RTC_FORMAT_FLOAT2, sizeof(Vec2fa), vertex_count);
|
||||
uint32_t *embree_triangles = (uint32_t *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(uint32_t) * 3, vertex_count / 3);
|
||||
|
||||
Vec3fa *embree_normals = nullptr;
|
||||
if (!p_normals.empty()) {
|
||||
embree_normals = (Vec3fa *)rtcSetNewGeometryBuffer(embree_mesh, RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE, 1, RTC_FORMAT_FLOAT3, sizeof(Vec3fa), vertex_count);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < vertex_count; i++) {
|
||||
embree_vertices[i] = Vec3fa(p_vertices[i].x, p_vertices[i].y, p_vertices[i].z);
|
||||
embree_light_uvs[i] = Vec2fa(p_uv2s[i].x, p_uv2s[i].y);
|
||||
if (embree_normals != nullptr) {
|
||||
embree_normals[i] = Vec3fa(p_normals[i].x, p_normals[i].y, p_normals[i].z);
|
||||
}
|
||||
embree_triangles[i] = i;
|
||||
}
|
||||
|
||||
rtcCommitGeometry(embree_mesh);
|
||||
rtcSetGeometryIntersectFilterFunction(embree_mesh, filter_function);
|
||||
rtcSetGeometryUserData(embree_mesh, this);
|
||||
rtcAttachGeometryByID(embree_scene, embree_mesh, p_id);
|
||||
rtcReleaseGeometry(embree_mesh);
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::commit() {
|
||||
rtcCommitScene(embree_scene);
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::set_mesh_filter(const Set<int> &p_mesh_ids) {
|
||||
for (Set<int>::Element *E = p_mesh_ids.front(); E; E = E->next()) {
|
||||
rtcDisableGeometry(rtcGetGeometry(embree_scene, E->get()));
|
||||
}
|
||||
rtcCommitScene(embree_scene);
|
||||
filter_meshes = p_mesh_ids;
|
||||
}
|
||||
|
||||
void LightmapRaycasterEmbree::clear_mesh_filter() {
|
||||
for (Set<int>::Element *E = filter_meshes.front(); E; E = E->next()) {
|
||||
rtcEnableGeometry(rtcGetGeometry(embree_scene, E->get()));
|
||||
}
|
||||
rtcCommitScene(embree_scene);
|
||||
filter_meshes.clear();
|
||||
}
|
||||
|
||||
void embree_error_handler(void *p_user_data, RTCError p_code, const char *p_str) {
|
||||
print_error("Embree error: " + String(p_str));
|
||||
}
|
||||
|
||||
LightmapRaycasterEmbree::LightmapRaycasterEmbree() {
|
||||
embree_device = rtcNewDevice(nullptr);
|
||||
rtcSetDeviceErrorFunction(embree_device, &embree_error_handler, nullptr);
|
||||
embree_scene = rtcNewScene(embree_device);
|
||||
}
|
||||
|
||||
LightmapRaycasterEmbree::~LightmapRaycasterEmbree() {
|
||||
if (embree_scene != nullptr)
|
||||
rtcReleaseScene(embree_scene);
|
||||
if (embree_device != nullptr)
|
||||
rtcReleaseDevice(embree_device);
|
||||
}
|
||||
@ -0,0 +1,73 @@
|
||||
/*************************************************************************/
|
||||
/* lightmap_raycaster.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "core/object.h"
|
||||
#include "scene/3d/lightmapper.h"
|
||||
#include "scene/resources/mesh.h"
|
||||
|
||||
#include <embree3/rtcore.h>
|
||||
|
||||
class LightmapRaycasterEmbree : public LightmapRaycaster {
|
||||
GDCLASS(LightmapRaycasterEmbree, LightmapRaycaster);
|
||||
|
||||
private:
|
||||
struct AlphaTextureData {
|
||||
Vector<uint8_t> data;
|
||||
Vector2i size;
|
||||
|
||||
uint8_t sample(float u, float v) const;
|
||||
};
|
||||
|
||||
RTCDevice embree_device;
|
||||
RTCScene embree_scene;
|
||||
|
||||
static void filter_function(const struct RTCFilterFunctionNArguments *p_args);
|
||||
|
||||
Map<unsigned int, AlphaTextureData> alpha_textures;
|
||||
Set<int> filter_meshes;
|
||||
|
||||
public:
|
||||
virtual bool intersect(Ray &p_ray);
|
||||
|
||||
virtual void intersect(Vector<Ray> &r_rays);
|
||||
|
||||
virtual void add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id);
|
||||
virtual void set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id);
|
||||
virtual void commit();
|
||||
|
||||
virtual void set_mesh_filter(const Set<int> &p_mesh_ids);
|
||||
virtual void clear_mesh_filter();
|
||||
|
||||
static LightmapRaycaster *create_embree_raycaster();
|
||||
static void make_default_raycaster();
|
||||
|
||||
LightmapRaycasterEmbree();
|
||||
~LightmapRaycasterEmbree();
|
||||
};
|
||||
@ -0,0 +1,40 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "register_types.h"
|
||||
|
||||
#include "lightmap_raycaster.h"
|
||||
|
||||
void register_raycast_types() {
|
||||
LightmapRaycasterEmbree::make_default_raycaster();
|
||||
}
|
||||
|
||||
void unregister_raycast_types() {
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
/*************************************************************************/
|
||||
/* register_types.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
void register_raycast_types();
|
||||
void unregister_raycast_types();
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,76 @@
|
||||
/*************************************************************************/
|
||||
/* lightmapper.cpp */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "lightmapper.h"
|
||||
|
||||
LightmapDenoiser *(*LightmapDenoiser::create_function)() = nullptr;
|
||||
|
||||
Ref<LightmapDenoiser> LightmapDenoiser::create() {
|
||||
if (create_function) {
|
||||
return Ref<LightmapDenoiser>(create_function());
|
||||
}
|
||||
return Ref<LightmapDenoiser>();
|
||||
}
|
||||
|
||||
LightmapRaycaster *(*LightmapRaycaster::create_function)() = nullptr;
|
||||
|
||||
Ref<LightmapRaycaster> LightmapRaycaster::create() {
|
||||
if (create_function) {
|
||||
return Ref<LightmapRaycaster>(create_function());
|
||||
}
|
||||
return Ref<LightmapRaycaster>();
|
||||
}
|
||||
|
||||
Lightmapper::CreateFunc Lightmapper::create_custom = nullptr;
|
||||
Lightmapper::CreateFunc Lightmapper::create_gpu = nullptr;
|
||||
Lightmapper::CreateFunc Lightmapper::create_cpu = nullptr;
|
||||
|
||||
Ref<Lightmapper> Lightmapper::create() {
|
||||
Lightmapper *lm = nullptr;
|
||||
if (create_custom) {
|
||||
lm = create_custom();
|
||||
}
|
||||
|
||||
if (!lm && create_gpu) {
|
||||
lm = create_gpu();
|
||||
}
|
||||
|
||||
if (!lm && create_cpu) {
|
||||
lm = create_cpu();
|
||||
}
|
||||
if (!lm) {
|
||||
return Ref<Lightmapper>();
|
||||
} else {
|
||||
return Ref<Lightmapper>(lm);
|
||||
}
|
||||
}
|
||||
|
||||
Lightmapper::Lightmapper() {
|
||||
}
|
||||
@ -0,0 +1,196 @@
|
||||
/*************************************************************************/
|
||||
/* lightmapper.h */
|
||||
/*************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/*************************************************************************/
|
||||
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
|
||||
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef LIGHTMAPPER_H
|
||||
#define LIGHTMAPPER_H
|
||||
|
||||
#include "scene/resources/mesh.h"
|
||||
|
||||
#if !defined(__aligned)
|
||||
|
||||
#if (defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)) && !defined(__CYGWIN__)
|
||||
#define __aligned(...) __declspec(align(__VA_ARGS__))
|
||||
#else
|
||||
#define __aligned(...) __attribute__((aligned(__VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
class LightmapDenoiser : public Reference {
|
||||
GDCLASS(LightmapDenoiser, Reference)
|
||||
protected:
|
||||
static LightmapDenoiser *(*create_function)();
|
||||
|
||||
public:
|
||||
virtual Ref<Image> denoise_image(const Ref<Image> &p_image) = 0;
|
||||
static Ref<LightmapDenoiser> create();
|
||||
};
|
||||
|
||||
class LightmapRaycaster : public Reference {
|
||||
GDCLASS(LightmapRaycaster, Reference)
|
||||
protected:
|
||||
static LightmapRaycaster *(*create_function)();
|
||||
|
||||
public:
|
||||
// compatible with embree3 rays
|
||||
struct __aligned(16) Ray {
|
||||
const static unsigned int INVALID_GEOMETRY_ID = ((unsigned int)-1); // from rtcore_common.h
|
||||
|
||||
/*! Default construction does nothing. */
|
||||
_FORCE_INLINE_ Ray() :
|
||||
geomID(INVALID_GEOMETRY_ID) {}
|
||||
|
||||
/*! Constructs a ray from origin, direction, and ray segment. Near
|
||||
* has to be smaller than far. */
|
||||
_FORCE_INLINE_ Ray(const Vector3 &org,
|
||||
const Vector3 &dir,
|
||||
float tnear = 0.0f,
|
||||
float tfar = INFINITY) :
|
||||
org(org),
|
||||
tnear(tnear),
|
||||
dir(dir),
|
||||
time(0.0f),
|
||||
tfar(tfar),
|
||||
mask(-1),
|
||||
u(0.0),
|
||||
v(0.0),
|
||||
primID(INVALID_GEOMETRY_ID),
|
||||
geomID(INVALID_GEOMETRY_ID),
|
||||
instID(INVALID_GEOMETRY_ID) {}
|
||||
|
||||
/*! Tests if we hit something. */
|
||||
_FORCE_INLINE_ explicit operator bool() const { return geomID != INVALID_GEOMETRY_ID; }
|
||||
|
||||
public:
|
||||
Vector3 org; //!< Ray origin + tnear
|
||||
float tnear; //!< Start of ray segment
|
||||
Vector3 dir; //!< Ray direction + tfar
|
||||
float time; //!< Time of this ray for motion blur.
|
||||
float tfar; //!< End of ray segment
|
||||
unsigned int mask; //!< used to mask out objects during traversal
|
||||
unsigned int id; //!< ray ID
|
||||
unsigned int flags; //!< ray flags
|
||||
|
||||
Vector3 normal; //!< Not normalized geometry normal
|
||||
float u; //!< Barycentric u coordinate of hit
|
||||
float v; //!< Barycentric v coordinate of hit
|
||||
unsigned int primID; //!< primitive ID
|
||||
unsigned int geomID; //!< geometry ID
|
||||
unsigned int instID; //!< instance ID
|
||||
};
|
||||
|
||||
virtual bool intersect(Ray &p_ray) = 0;
|
||||
|
||||
virtual void intersect(Vector<Ray> &r_rays) = 0;
|
||||
|
||||
virtual void add_mesh(const Vector<Vector3> &p_vertices, const Vector<Vector3> &p_normals, const Vector<Vector2> &p_uv2s, unsigned int p_id) = 0;
|
||||
virtual void set_mesh_alpha_texture(Ref<Image> p_alpha_texture, unsigned int p_id) = 0;
|
||||
virtual void commit() = 0;
|
||||
|
||||
virtual void set_mesh_filter(const Set<int> &p_mesh_ids) = 0;
|
||||
virtual void clear_mesh_filter() = 0;
|
||||
|
||||
static Ref<LightmapRaycaster> create();
|
||||
};
|
||||
|
||||
class Lightmapper : public Reference {
|
||||
GDCLASS(Lightmapper, Reference)
|
||||
public:
|
||||
enum LightType {
|
||||
LIGHT_TYPE_DIRECTIONAL,
|
||||
LIGHT_TYPE_OMNI,
|
||||
LIGHT_TYPE_SPOT
|
||||
};
|
||||
|
||||
enum BakeError {
|
||||
BAKE_ERROR_LIGHTMAP_TOO_SMALL,
|
||||
BAKE_ERROR_LIGHTMAP_CANT_PRE_BAKE_MESHES,
|
||||
BAKE_ERROR_NO_MESHES,
|
||||
BAKE_ERROR_USER_ABORTED,
|
||||
BAKE_ERROR_NO_RAYCASTER,
|
||||
BAKE_OK
|
||||
};
|
||||
|
||||
enum BakeQuality {
|
||||
BAKE_QUALITY_LOW,
|
||||
BAKE_QUALITY_MEDIUM,
|
||||
BAKE_QUALITY_HIGH,
|
||||
BAKE_QUALITY_ULTRA,
|
||||
};
|
||||
|
||||
typedef Lightmapper *(*CreateFunc)();
|
||||
|
||||
static CreateFunc create_custom;
|
||||
static CreateFunc create_gpu;
|
||||
static CreateFunc create_cpu;
|
||||
|
||||
protected:
|
||||
public:
|
||||
typedef bool (*BakeStepFunc)(float, const String &, void *, bool); //progress, step description, userdata, force refresh
|
||||
|
||||
struct MeshData {
|
||||
struct TextureDef {
|
||||
RID tex_rid;
|
||||
Color mul;
|
||||
Color add;
|
||||
};
|
||||
|
||||
//triangle data
|
||||
Vector<Vector3> points;
|
||||
Vector<Vector2> uv;
|
||||
Vector<Vector2> uv2;
|
||||
Vector<Vector3> normal;
|
||||
Vector<TextureDef> albedo;
|
||||
Vector<TextureDef> emission;
|
||||
Vector<int> surface_facecounts;
|
||||
Variant userdata;
|
||||
};
|
||||
|
||||
virtual void add_albedo_texture(Ref<Texture> p_texture) = 0;
|
||||
virtual void add_emission_texture(Ref<Texture> p_texture) = 0;
|
||||
virtual void add_mesh(const MeshData &p_mesh, Vector2i p_size) = 0;
|
||||
virtual void add_directional_light(bool p_bake_direct, const Vector3 &p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier) = 0;
|
||||
virtual void add_omni_light(bool p_bake_direct, const Vector3 &p_position, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation) = 0;
|
||||
virtual void add_spot_light(bool p_bake_direct, const Vector3 &p_position, const Vector3 p_direction, const Color &p_color, float p_energy, float p_indirect_multiplier, float p_range, float p_attenuation, float p_spot_angle, float p_spot_attenuation) = 0;
|
||||
virtual BakeError bake(BakeQuality p_quality, bool p_use_denoiser, int p_bounces, float p_bias, bool p_generate_atlas, int p_max_texture_size, const Ref<Image> &p_environment_panorama, const Basis &p_environment_transform, BakeStepFunc p_step_function = nullptr, void *p_step_userdata = nullptr, BakeStepFunc p_substep_function = nullptr) = 0;
|
||||
|
||||
virtual int get_bake_texture_count() const = 0;
|
||||
virtual Ref<Image> get_bake_texture(int p_index) const = 0;
|
||||
virtual int get_bake_mesh_count() const = 0;
|
||||
virtual Variant get_bake_mesh_userdata(int p_index) const = 0;
|
||||
virtual Rect2 get_bake_mesh_uv_scale(int p_index) const = 0;
|
||||
virtual int get_bake_mesh_texture_slice(int p_index) const = 0;
|
||||
|
||||
static Ref<Lightmapper> create();
|
||||
|
||||
Lightmapper();
|
||||
};
|
||||
|
||||
#endif // LIGHTMAPPER_H
|
||||
@ -0,0 +1,55 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "parallel_reduce.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
|
||||
template<typename Index, class UnaryPredicate>
|
||||
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
#if defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred,&context](const tbb::blocked_range<size_t>& r) {
|
||||
if (context.is_group_execution_cancelled()) return;
|
||||
for (size_t i = r.begin(); i != r.end(); ++i) {
|
||||
if (pred(i)) {
|
||||
ret = true;
|
||||
context.cancel_group_execution();
|
||||
}
|
||||
}
|
||||
});
|
||||
#else
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>{first, last}, [&ret,pred](const tbb::blocked_range<size_t>& r) {
|
||||
if (tbb::task::self().is_cancelled()) return;
|
||||
for (size_t i = r.begin(); i != r.end(); ++i) {
|
||||
if (pred(i)) {
|
||||
ret = true;
|
||||
tbb::task::self().cancel_group_execution();
|
||||
}
|
||||
}
|
||||
});
|
||||
#endif
|
||||
#else
|
||||
ret = parallel_reduce (first, last, false, [pred](const range<size_t>& r)->bool {
|
||||
bool localret = false;
|
||||
for (auto i=r.begin(); i<r.end(); ++i) {
|
||||
localret |= pred(i);
|
||||
}
|
||||
return localret;
|
||||
},
|
||||
std::bit_or<bool>()
|
||||
);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
@ -0,0 +1,56 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_filter.h"
|
||||
#include "../sys/regression.h"
|
||||
#include <map>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_filter_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_filter_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
auto pred = [&]( uint32_t v ) { return (v & 0x3) == 0; };
|
||||
|
||||
for (size_t N=10; N<1000000; N=size_t(2.1*N))
|
||||
{
|
||||
size_t N0 = rand() % N;
|
||||
|
||||
/* initialize array with random numbers */
|
||||
std::vector<uint32_t> src(N);
|
||||
std::map<uint32_t,int> m;
|
||||
for (size_t i=0; i<N; i++) src[i] = rand();
|
||||
|
||||
/* count elements up */
|
||||
for (size_t i=N0; i<N; i++)
|
||||
if (pred(src[i]))
|
||||
m[src[i]] = 0;
|
||||
for (size_t i=N0; i<N; i++)
|
||||
if (pred(src[i]))
|
||||
m[src[i]]++;
|
||||
|
||||
/* filter array */
|
||||
//size_t M = sequential_filter(src.data(),N0,N,pred);
|
||||
size_t M = parallel_filter(src.data(),N0,N,size_t(1024),pred);
|
||||
|
||||
/* check if filtered data is correct */
|
||||
for (size_t i=N0; i<M; i++) {
|
||||
passed &= pred(src[i]);
|
||||
m[src[i]]--;
|
||||
}
|
||||
for (size_t i=N0; i<M; i++)
|
||||
passed &= (m[src[i]] == 0);
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_filter_regression_test parallel_filter_regression("parallel_filter_regression");
|
||||
}
|
||||
@ -0,0 +1,93 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Ty, typename Index, typename Predicate>
|
||||
inline Index sequential_filter( Ty* data, const Index first, const Index last, const Predicate& predicate)
|
||||
{
|
||||
Index j = first;
|
||||
for (Index i=first; i<last; i++)
|
||||
if (predicate(data[i]))
|
||||
data[j++] = data[i];
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
template<typename Ty, typename Index, typename Predicate>
|
||||
inline Index parallel_filter( Ty* data, const Index begin, const Index end, const Index minStepSize, const Predicate& predicate)
|
||||
{
|
||||
/* sequential fallback */
|
||||
if (end-begin <= minStepSize)
|
||||
return sequential_filter(data,begin,end,predicate);
|
||||
|
||||
/* calculate number of tasks to use */
|
||||
enum { MAX_TASKS = 64 };
|
||||
const Index numThreads = TaskScheduler::threadCount();
|
||||
const Index numBlocks = (end-begin+minStepSize-1)/minStepSize;
|
||||
const Index taskCount = min(numThreads,numBlocks,(Index)MAX_TASKS);
|
||||
|
||||
/* filter blocks */
|
||||
Index nused[MAX_TASKS];
|
||||
Index nfree[MAX_TASKS];
|
||||
parallel_for(taskCount, [&](const Index taskIndex)
|
||||
{
|
||||
const Index i0 = begin+(taskIndex+0)*(end-begin)/taskCount;
|
||||
const Index i1 = begin+(taskIndex+1)*(end-begin)/taskCount;
|
||||
const Index i2 = sequential_filter(data,i0,i1,predicate);
|
||||
nused[taskIndex] = i2-i0;
|
||||
nfree[taskIndex] = i1-i2;
|
||||
});
|
||||
|
||||
/* calculate offsets */
|
||||
Index sused=0;
|
||||
Index sfree=0;
|
||||
Index pfree[MAX_TASKS];
|
||||
for (Index i=0; i<taskCount; i++)
|
||||
{
|
||||
sused+=nused[i];
|
||||
Index cfree = nfree[i]; pfree[i] = sfree; sfree+=cfree;
|
||||
}
|
||||
|
||||
/* return if we did not filter out any element */
|
||||
assert(sfree <= end-begin);
|
||||
assert(sused <= end-begin);
|
||||
if (sused == end-begin)
|
||||
return end;
|
||||
|
||||
/* otherwise we have to copy misplaced elements around */
|
||||
parallel_for(taskCount, [&](const Index taskIndex)
|
||||
{
|
||||
/* destination to write elements to */
|
||||
Index dst = begin+(taskIndex+0)*(end-begin)/taskCount+nused[taskIndex];
|
||||
Index dst_end = min(dst+nfree[taskIndex],begin+sused);
|
||||
if (dst_end <= dst) return;
|
||||
|
||||
/* range of misplaced elements to copy to destination */
|
||||
Index r0 = pfree[taskIndex];
|
||||
Index r1 = r0+dst_end-dst;
|
||||
|
||||
/* find range in misplaced elements in back to front order */
|
||||
Index k0=0;
|
||||
for (Index i=taskCount-1; i>0; i--)
|
||||
{
|
||||
if (k0 > r1) break;
|
||||
Index k1 = k0+nused[i];
|
||||
Index src = begin+(i+0)*(end-begin)/taskCount+nused[i];
|
||||
for (Index i=max(r0,k0); i<min(r1,k1); i++) {
|
||||
Index isrc = src-i+k0-1;
|
||||
assert(dst >= begin && dst < end);
|
||||
assert(isrc >= begin && isrc < end);
|
||||
data[dst++] = data[isrc];
|
||||
}
|
||||
k0 = k1;
|
||||
}
|
||||
});
|
||||
|
||||
return begin+sused;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,48 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_for.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_for_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_for_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
const size_t M = 10;
|
||||
for (size_t N=10; N<10000000; N=size_t(2.1*N))
|
||||
{
|
||||
/* sequentially calculate sum of squares */
|
||||
size_t sum0 = 0;
|
||||
for (size_t i=0; i<N; i++) {
|
||||
sum0 += i*i;
|
||||
}
|
||||
|
||||
/* parallel calculation of sum of squares */
|
||||
for (size_t m=0; m<M; m++)
|
||||
{
|
||||
std::atomic<size_t> sum1(0);
|
||||
parallel_for( size_t(0), size_t(N), size_t(1024), [&](const range<size_t>& r)
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
s += i*i;
|
||||
sum1 += s;
|
||||
});
|
||||
passed = sum0 == sum1;
|
||||
}
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_for_regression_test parallel_for_regression("parallel_for_regression_test");
|
||||
}
|
||||
@ -0,0 +1,156 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../tasking/taskscheduler.h"
|
||||
#include "../sys/array.h"
|
||||
#include "../math/math.h"
|
||||
#include "../math/range.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* parallel_for without range */
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for( const Index N, const Func& func)
|
||||
{
|
||||
#if defined(TASKING_INTERNAL)
|
||||
if (N) {
|
||||
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
|
||||
assert(r.size() == 1);
|
||||
func(r.begin());
|
||||
});
|
||||
if (!TaskScheduler::wait())
|
||||
throw std::runtime_error("task cancelled");
|
||||
}
|
||||
|
||||
#elif defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
|
||||
#elif defined(TASKING_PPL)
|
||||
concurrency::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
});
|
||||
#else
|
||||
# error "no tasking system enabled"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* parallel for with range and granulatity */
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
|
||||
{
|
||||
assert(first <= last);
|
||||
#if defined(TASKING_INTERNAL)
|
||||
TaskScheduler::spawn(first,last,minStepSize,func);
|
||||
if (!TaskScheduler::wait())
|
||||
throw std::runtime_error("task cancelled");
|
||||
|
||||
#elif defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||
func(range<Index>(r.begin(),r.end()));
|
||||
},context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
|
||||
func(range<Index>(r.begin(),r.end()));
|
||||
});
|
||||
if (tbb::task::self().is_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
|
||||
#elif defined(TASKING_PPL)
|
||||
concurrency::parallel_for(first, last, Index(1) /*minStepSize*/, [&](Index i) {
|
||||
func(range<Index>(i,i+1));
|
||||
});
|
||||
|
||||
#else
|
||||
# error "no tasking system enabled"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* parallel for with range */
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for( const Index first, const Index last, const Func& func)
|
||||
{
|
||||
assert(first <= last);
|
||||
parallel_for(first,last,(Index)1,func);
|
||||
}
|
||||
|
||||
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION > 4001)
|
||||
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for_static( const Index N, const Func& func)
|
||||
{
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},tbb::simple_partitioner(),context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},tbb::simple_partitioner());
|
||||
if (tbb::task::self().is_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef tbb::affinity_partitioner affinity_partitioner;
|
||||
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for_affinity( const Index N, const Func& func, tbb::affinity_partitioner& ap)
|
||||
{
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},ap,context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#else
|
||||
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
|
||||
func(i);
|
||||
},ap);
|
||||
if (tbb::task::self().is_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for_static( const Index N, const Func& func)
|
||||
{
|
||||
parallel_for(N,func);
|
||||
}
|
||||
|
||||
struct affinity_partitioner {
|
||||
};
|
||||
|
||||
template<typename Index, typename Func>
|
||||
__forceinline void parallel_for_affinity( const Index N, const Func& func, affinity_partitioner& ap)
|
||||
{
|
||||
parallel_for(N,func);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -0,0 +1,63 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_for_for.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_for_for_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_for_for_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
/* create vector with random numbers */
|
||||
size_t sum0 = 0;
|
||||
size_t K = 0;
|
||||
const size_t M = 1000;
|
||||
std::vector<std::vector<size_t>* > array2(M);
|
||||
for (size_t i=0; i<M; i++) {
|
||||
const size_t N = rand() % 1024;
|
||||
K+=N;
|
||||
array2[i] = new std::vector<size_t>(N);
|
||||
for (size_t j=0; j<N; j++)
|
||||
sum0 += (*array2[i])[j] = rand();
|
||||
}
|
||||
|
||||
/* array to test global index */
|
||||
std::vector<atomic<size_t>> verify_k(K);
|
||||
for (size_t i=0; i<K; i++) verify_k[i].store(0);
|
||||
|
||||
/* add all numbers using parallel_for_for */
|
||||
std::atomic<size_t> sum1(0);
|
||||
parallel_for_for( array2, size_t(1), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k) -> size_t
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
s += (*v)[i];
|
||||
verify_k[k++]++;
|
||||
}
|
||||
sum1 += s;
|
||||
return sum1;
|
||||
});
|
||||
passed &= (sum0 == sum1);
|
||||
|
||||
/* check global index */
|
||||
for (size_t i=0; i<K; i++)
|
||||
passed &= (verify_k[i] == 1);
|
||||
|
||||
/* delete vectors again */
|
||||
for (size_t i=0; i<array2.size(); i++)
|
||||
delete array2[i];
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_for_for_regression_test parallel_for_for_regression("parallel_for_for_regression_test");
|
||||
}
|
||||
@ -0,0 +1,149 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename ArrayArray, typename Func>
|
||||
__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
|
||||
{
|
||||
size_t k=0;
|
||||
for (size_t i=0; i!=array2.size(); ++i) {
|
||||
const size_t N = array2[i]->size();
|
||||
if (N) func(array2[i],range<size_t>(0,N),k);
|
||||
k+=N;
|
||||
}
|
||||
}
|
||||
|
||||
class ParallelForForState
|
||||
{
|
||||
public:
|
||||
|
||||
enum { MAX_TASKS = 64 };
|
||||
|
||||
__forceinline ParallelForForState ()
|
||||
: taskCount(0) {}
|
||||
|
||||
template<typename ArrayArray>
|
||||
__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
|
||||
init(array2,minStepSize);
|
||||
}
|
||||
|
||||
template<typename ArrayArray>
|
||||
__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
|
||||
{
|
||||
/* first calculate total number of elements */
|
||||
size_t N = 0;
|
||||
for (size_t i=0; i<array2.size(); i++) {
|
||||
N += array2[i] ? array2[i]->size() : 0;
|
||||
}
|
||||
this->N = N;
|
||||
|
||||
/* calculate number of tasks to use */
|
||||
const size_t numThreads = TaskScheduler::threadCount();
|
||||
const size_t numBlocks = (N+minStepSize-1)/minStepSize;
|
||||
taskCount = max(size_t(1),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS)));
|
||||
|
||||
/* calculate start (i,j) for each task */
|
||||
size_t taskIndex = 0;
|
||||
i0[taskIndex] = 0;
|
||||
j0[taskIndex] = 0;
|
||||
size_t k0 = (++taskIndex)*N/taskCount;
|
||||
for (size_t i=0, k=0; taskIndex < taskCount; i++)
|
||||
{
|
||||
assert(i<array2.size());
|
||||
size_t j=0, M = array2[i] ? array2[i]->size() : 0;
|
||||
while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
|
||||
assert(taskIndex<taskCount);
|
||||
i0[taskIndex] = i;
|
||||
j0[taskIndex] = j += k0-k;
|
||||
k=k0;
|
||||
k0 = (++taskIndex)*N/taskCount;
|
||||
}
|
||||
k+=M-j;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline size_t size() const {
|
||||
return N;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t i0[MAX_TASKS];
|
||||
size_t j0[MAX_TASKS];
|
||||
size_t taskCount;
|
||||
size_t N;
|
||||
};
|
||||
|
||||
template<typename ArrayArray, typename Func>
|
||||
__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
|
||||
{
|
||||
ParallelForForState state(array2,minStepSize);
|
||||
|
||||
parallel_for(state.taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
/* calculate range */
|
||||
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
|
||||
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
|
||||
size_t i0 = state.i0[taskIndex];
|
||||
size_t j0 = state.j0[taskIndex];
|
||||
|
||||
/* iterate over arrays */
|
||||
size_t k=k0;
|
||||
for (size_t i=i0; k<k1; i++) {
|
||||
const size_t N = array2[i] ? array2[i]->size() : 0;
|
||||
const size_t r0 = j0, r1 = min(N,r0+k1-k);
|
||||
if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
|
||||
k+=r1-r0; j0 = 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Func>
|
||||
__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
|
||||
{
|
||||
parallel_for_for(array2,1,func);
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
ParallelForForState state(array2,minStepSize);
|
||||
Value temp[ParallelForForState::MAX_TASKS];
|
||||
|
||||
for (size_t i=0; i<state.taskCount; i++)
|
||||
temp[i] = identity;
|
||||
|
||||
parallel_for(state.taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
/* calculate range */
|
||||
const size_t k0 = (taskIndex+0)*state.size()/state.taskCount;
|
||||
const size_t k1 = (taskIndex+1)*state.size()/state.taskCount;
|
||||
size_t i0 = state.i0[taskIndex];
|
||||
size_t j0 = state.j0[taskIndex];
|
||||
|
||||
/* iterate over arrays */
|
||||
size_t k=k0;
|
||||
for (size_t i=i0; k<k1; i++) {
|
||||
const size_t N = array2[i] ? array2[i]->size() : 0;
|
||||
const size_t r0 = j0, r1 = min(N,r0+k1-k);
|
||||
if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
|
||||
k+=r1-r0; j0 = 0;
|
||||
}
|
||||
});
|
||||
|
||||
Value ret = identity;
|
||||
for (size_t i=0; i<state.taskCount; i++)
|
||||
ret = reduction(ret,temp[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
return parallel_for_for_reduce(array2,1,identity,func,reduction);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_for_for_prefix_sum.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_for_for_prefix_sum_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_for_for_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
/* create vector with random numbers */
|
||||
const size_t M = 10;
|
||||
std::vector<atomic<size_t>> flattened;
|
||||
typedef std::vector<std::vector<size_t>* > ArrayArray;
|
||||
ArrayArray array2(M);
|
||||
size_t K = 0;
|
||||
for (size_t i=0; i<M; i++) {
|
||||
const size_t N = rand() % 10;
|
||||
K += N;
|
||||
array2[i] = new std::vector<size_t>(N);
|
||||
for (size_t j=0; j<N; j++)
|
||||
(*array2[i])[j] = rand() % 10;
|
||||
}
|
||||
|
||||
/* array to test global index */
|
||||
std::vector<atomic<size_t>> verify_k(K);
|
||||
for (size_t i=0; i<K; i++) verify_k[i].store(0);
|
||||
|
||||
ParallelForForPrefixSumState<size_t> state(array2,size_t(1));
|
||||
|
||||
/* dry run only counts */
|
||||
size_t S = parallel_for_for_prefix_sum0( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i) -> size_t
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
s += (*v)[i];
|
||||
verify_k[k++]++;
|
||||
}
|
||||
return s;
|
||||
}, [](size_t v0, size_t v1) { return v0+v1; });
|
||||
|
||||
/* create properly sized output array */
|
||||
flattened.resize(S);
|
||||
for (auto& a : flattened) a.store(0);
|
||||
|
||||
/* now we actually fill the flattened array */
|
||||
parallel_for_for_prefix_sum1( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i, const size_t base) -> size_t
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
for (size_t j=0; j<(*v)[i]; j++) {
|
||||
flattened[base+s+j]++;
|
||||
}
|
||||
s += (*v)[i];
|
||||
verify_k[k++]++;
|
||||
}
|
||||
return s;
|
||||
}, [](size_t v0, size_t v1) { return v0+v1; });
|
||||
|
||||
/* check global index */
|
||||
for (size_t i=0; i<K; i++)
|
||||
passed &= (verify_k[i] == 2);
|
||||
|
||||
/* check if each element was assigned exactly once */
|
||||
for (size_t i=0; i<flattened.size(); i++)
|
||||
passed &= (flattened[i] == 1);
|
||||
|
||||
/* delete arrays again */
|
||||
for (size_t i=0; i<array2.size(); i++)
|
||||
delete array2[i];
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_for_for_prefix_sum_regression_test parallel_for_for_prefix_sum_regression("parallel_for_for_prefix_sum_regression_test");
|
||||
}
|
||||
@ -0,0 +1,112 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for_for.h"
|
||||
#include "parallel_prefix_sum.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Value>
|
||||
struct ParallelForForPrefixSumState : public ParallelForForState
|
||||
{
|
||||
__forceinline ParallelForForPrefixSumState () {}
|
||||
|
||||
template<typename ArrayArray>
|
||||
__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
|
||||
: ParallelForForState(array2,minStepSize) {}
|
||||
|
||||
ParallelPrefixSumState<Value> prefix_state;
|
||||
};
|
||||
|
||||
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
|
||||
const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
/* calculate number of tasks to use */
|
||||
const size_t taskCount = state.taskCount;
|
||||
/* perform parallel prefix sum */
|
||||
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
||||
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
||||
size_t i0 = state.i0[taskIndex];
|
||||
size_t j0 = state.j0[taskIndex];
|
||||
|
||||
/* iterate over arrays */
|
||||
size_t k=k0;
|
||||
Value N=identity;
|
||||
for (size_t i=i0; k<k1; i++) {
|
||||
const size_t size = array2[i] ? array2[i]->size() : 0;
|
||||
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
||||
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i));
|
||||
k+=r1-r0; j0 = 0;
|
||||
}
|
||||
state.prefix_state.counts[taskIndex] = N;
|
||||
});
|
||||
|
||||
/* calculate prefix sum */
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<taskCount; i++)
|
||||
{
|
||||
const Value c = state.prefix_state.counts[i];
|
||||
state.prefix_state.sums[i] = sum;
|
||||
sum=reduction(sum,c);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
|
||||
const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
/* calculate number of tasks to use */
|
||||
const size_t taskCount = state.taskCount;
|
||||
/* perform parallel prefix sum */
|
||||
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
||||
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
||||
size_t i0 = state.i0[taskIndex];
|
||||
size_t j0 = state.j0[taskIndex];
|
||||
|
||||
/* iterate over arrays */
|
||||
size_t k=k0;
|
||||
Value N=identity;
|
||||
for (size_t i=i0; k<k1; i++) {
|
||||
const size_t size = array2[i] ? array2[i]->size() : 0;
|
||||
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
||||
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i,reduction(state.prefix_state.sums[taskIndex],N)));
|
||||
k+=r1-r0; j0 = 0;
|
||||
}
|
||||
state.prefix_state.counts[taskIndex] = N;
|
||||
});
|
||||
|
||||
/* calculate prefix sum */
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<taskCount; i++)
|
||||
{
|
||||
const Value c = state.prefix_state.counts[i];
|
||||
state.prefix_state.sums[i] = sum;
|
||||
sum=reduction(sum,c);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
||||
const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
|
||||
}
|
||||
|
||||
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
||||
const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_map.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_map_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_map_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
/* create key/value vectors with random numbers */
|
||||
const size_t N = 10000;
|
||||
std::vector<uint32_t> keys(N);
|
||||
std::vector<uint32_t> vals(N);
|
||||
for (size_t i=0; i<N; i++) keys[i] = 2*unsigned(i)*647382649;
|
||||
for (size_t i=0; i<N; i++) std::swap(keys[i],keys[rand()%N]);
|
||||
for (size_t i=0; i<N; i++) vals[i] = 2*rand();
|
||||
|
||||
/* create map */
|
||||
parallel_map<uint32_t,uint32_t> map;
|
||||
map.init(keys,vals);
|
||||
|
||||
/* check that all keys are properly mapped */
|
||||
for (size_t i=0; i<N; i++) {
|
||||
const uint32_t* val = map.lookup(keys[i]);
|
||||
passed &= val && (*val == vals[i]);
|
||||
}
|
||||
|
||||
/* check that these keys are not in the map */
|
||||
for (size_t i=0; i<N; i++) {
|
||||
passed &= !map.lookup(keys[i]+1);
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_map_regression_test parallel_map_regression("parallel_map_regression_test");
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_sort.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! implementation of a key/value map with parallel construction */
|
||||
template<typename Key, typename Val>
|
||||
class parallel_map
|
||||
{
|
||||
/* key/value pair to build the map */
|
||||
struct KeyValue
|
||||
{
|
||||
__forceinline KeyValue () {}
|
||||
|
||||
__forceinline KeyValue (const Key key, const Val val)
|
||||
: key(key), val(val) {}
|
||||
|
||||
__forceinline operator Key() const {
|
||||
return key;
|
||||
}
|
||||
|
||||
public:
|
||||
Key key;
|
||||
Val val;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/*! parallel map constructors */
|
||||
parallel_map () {}
|
||||
|
||||
/*! construction from pair of vectors */
|
||||
template<typename KeyVector, typename ValVector>
|
||||
parallel_map (const KeyVector& keys, const ValVector& values) { init(keys,values); }
|
||||
|
||||
/*! initialized the parallel map from a vector with keys and values */
|
||||
template<typename KeyVector, typename ValVector>
|
||||
void init(const KeyVector& keys, const ValVector& values)
|
||||
{
|
||||
/* reserve sufficient space for all data */
|
||||
assert(keys.size() == values.size());
|
||||
vec.resize(keys.size());
|
||||
|
||||
/* generate key/value pairs */
|
||||
parallel_for( size_t(0), keys.size(), size_t(4*4096), [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
vec[i] = KeyValue((Key)keys[i],values[i]);
|
||||
});
|
||||
|
||||
/* perform parallel radix sort of the key/value pairs */
|
||||
std::vector<KeyValue> temp(keys.size());
|
||||
radix_sort<KeyValue,Key>(vec.data(),temp.data(),keys.size());
|
||||
}
|
||||
|
||||
/*! Returns a pointer to the value associated with the specified key. The pointer will be nullptr of the key is not contained in the map. */
|
||||
__forceinline const Val* lookup(const Key& key) const
|
||||
{
|
||||
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
|
||||
if (i == vec.end()) return nullptr;
|
||||
if (i->key != key) return nullptr;
|
||||
return &i->val;
|
||||
}
|
||||
|
||||
/*! If the key is in the map, the function returns the value associated with the key, otherwise it returns the default value. */
|
||||
__forceinline Val lookup(const Key& key, const Val& def) const
|
||||
{
|
||||
typename std::vector<KeyValue>::const_iterator i = std::lower_bound(vec.begin(), vec.end(), key);
|
||||
if (i == vec.end()) return def;
|
||||
if (i->key != key) return def;
|
||||
return i->val;
|
||||
}
|
||||
|
||||
/*! clears all state */
|
||||
void clear() {
|
||||
vec.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<KeyValue> vec; //!< vector containing sorted elements
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,53 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_partition.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_partition_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_partition_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
for (size_t i=0; i<100; i++)
|
||||
{
|
||||
/* create random permutation */
|
||||
size_t N = std::rand() % 1000000;
|
||||
std::vector<unsigned> array(N);
|
||||
for (unsigned i=0; i<N; i++) array[i] = i;
|
||||
for (auto& v : array) std::swap(v,array[std::rand()%array.size()]);
|
||||
size_t split = std::rand() % (N+1);
|
||||
|
||||
/* perform parallel partitioning */
|
||||
size_t left_sum = 0, right_sum = 0;
|
||||
size_t mid = parallel_partitioning(array.data(),0,array.size(),0,left_sum,right_sum,
|
||||
[&] ( size_t i ) { return i < split; },
|
||||
[] ( size_t& sum, unsigned v) { sum += v; },
|
||||
[] ( size_t& sum, size_t v) { sum += v; },
|
||||
128);
|
||||
|
||||
/*serial_partitioning(array.data(),0,array.size(),left_sum,right_sum,
|
||||
[&] ( size_t i ) { return i < split; },
|
||||
[] ( size_t& left_sum, int v) { left_sum += v; });*/
|
||||
|
||||
/* verify result */
|
||||
passed &= mid == split;
|
||||
passed &= left_sum == split*(split-1)/2;
|
||||
passed &= right_sum == N*(N-1)/2-left_sum;
|
||||
for (size_t i=0; i<split; i++) passed &= array[i] < split;
|
||||
for (size_t i=split; i<N; i++) passed &= array[i] >= split;
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_partition_regression_test parallel_partition_regression("parallel_partition_regression_test");
|
||||
}
|
||||
@ -0,0 +1,283 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
#include "../math/range.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* serial partitioning */
|
||||
template<typename T, typename V, typename IsLeft, typename Reduction_T>
|
||||
__forceinline size_t serial_partitioning(T* array,
|
||||
const size_t begin,
|
||||
const size_t end,
|
||||
V& leftReduction,
|
||||
V& rightReduction,
|
||||
const IsLeft& is_left,
|
||||
const Reduction_T& reduction_t)
|
||||
{
|
||||
T* l = array + begin;
|
||||
T* r = array + end - 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
/* *l < pivot */
|
||||
while (likely(l <= r && is_left(*l) ))
|
||||
{
|
||||
//prefetchw(l+4); // FIXME: enable?
|
||||
reduction_t(leftReduction,*l);
|
||||
++l;
|
||||
}
|
||||
/* *r >= pivot) */
|
||||
while (likely(l <= r && !is_left(*r)))
|
||||
{
|
||||
//prefetchw(r-4); FIXME: enable?
|
||||
reduction_t(rightReduction,*r);
|
||||
--r;
|
||||
}
|
||||
if (r<l) break;
|
||||
|
||||
reduction_t(leftReduction ,*r);
|
||||
reduction_t(rightReduction,*l);
|
||||
xchg(*l,*r);
|
||||
l++; r--;
|
||||
}
|
||||
|
||||
return l - array;
|
||||
}
|
||||
|
||||
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||
class __aligned(64) parallel_partition_task
|
||||
{
|
||||
ALIGNED_CLASS_(64);
|
||||
private:
|
||||
|
||||
static const size_t MAX_TASKS = 64;
|
||||
|
||||
T* array;
|
||||
size_t N;
|
||||
const IsLeft& is_left;
|
||||
const Reduction_T& reduction_t;
|
||||
const Reduction_V& reduction_v;
|
||||
const Vi& identity;
|
||||
|
||||
size_t numTasks;
|
||||
__aligned(64) size_t counter_start[MAX_TASKS+1];
|
||||
__aligned(64) size_t counter_left[MAX_TASKS+1];
|
||||
__aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];
|
||||
__aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS];
|
||||
__aligned(64) V leftReductions[MAX_TASKS];
|
||||
__aligned(64) V rightReductions[MAX_TASKS];
|
||||
|
||||
public:
|
||||
|
||||
__forceinline parallel_partition_task(T* array,
|
||||
const size_t N,
|
||||
const Vi& identity,
|
||||
const IsLeft& is_left,
|
||||
const Reduction_T& reduction_t,
|
||||
const Reduction_V& reduction_v,
|
||||
const size_t BLOCK_SIZE)
|
||||
|
||||
: array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
|
||||
numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
|
||||
|
||||
__forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
|
||||
{
|
||||
size_t i = 0;
|
||||
while(index >= (size_t)r[i].size())
|
||||
{
|
||||
assert(i < numRanges);
|
||||
index -= (size_t)r[i].size();
|
||||
i++;
|
||||
}
|
||||
return &r[i];
|
||||
}
|
||||
|
||||
__forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
|
||||
const size_t numRightMisplacedRanges,
|
||||
const size_t startID,
|
||||
const size_t endID)
|
||||
{
|
||||
size_t leftLocalIndex = startID;
|
||||
size_t rightLocalIndex = startID;
|
||||
const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
|
||||
const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
|
||||
|
||||
size_t l_left = l_range->size() - leftLocalIndex;
|
||||
size_t r_left = r_range->size() - rightLocalIndex;
|
||||
T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
|
||||
T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
|
||||
size_t size = endID - startID;
|
||||
size_t items = min(size,min(l_left,r_left));
|
||||
|
||||
while (size)
|
||||
{
|
||||
if (unlikely(l_left == 0))
|
||||
{
|
||||
l_range++;
|
||||
l_left = l_range->size();
|
||||
l = &array[l_range->begin()];
|
||||
items = min(size,min(l_left,r_left));
|
||||
}
|
||||
|
||||
if (unlikely(r_left == 0))
|
||||
{
|
||||
r_range++;
|
||||
r_left = r_range->size();
|
||||
r = &array[r_range->begin()];
|
||||
items = min(size,min(l_left,r_left));
|
||||
}
|
||||
|
||||
size -= items;
|
||||
l_left -= items;
|
||||
r_left -= items;
|
||||
|
||||
while(items) {
|
||||
items--;
|
||||
xchg(*l++,*r++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline size_t partition(V& leftReduction, V& rightReduction)
|
||||
{
|
||||
/* partition the individual ranges for each task */
|
||||
parallel_for(numTasks,[&] (const size_t taskID) {
|
||||
const size_t startID = (taskID+0)*N/numTasks;
|
||||
const size_t endID = (taskID+1)*N/numTasks;
|
||||
V local_left(identity);
|
||||
V local_right(identity);
|
||||
const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
|
||||
counter_start[taskID] = startID;
|
||||
counter_left [taskID] = mid-startID;
|
||||
leftReductions[taskID] = local_left;
|
||||
rightReductions[taskID] = local_right;
|
||||
});
|
||||
counter_start[numTasks] = N;
|
||||
counter_left[numTasks] = 0;
|
||||
|
||||
/* finalize the reductions */
|
||||
for (size_t i=0; i<numTasks; i++) {
|
||||
reduction_v(leftReduction,leftReductions[i]);
|
||||
reduction_v(rightReduction,rightReductions[i]);
|
||||
}
|
||||
|
||||
/* calculate mid point for partitioning */
|
||||
size_t mid = counter_left[0];
|
||||
for (size_t i=1; i<numTasks; i++)
|
||||
mid += counter_left[i];
|
||||
const range<ssize_t> globalLeft (0,mid);
|
||||
const range<ssize_t> globalRight(mid,N);
|
||||
|
||||
/* calculate all left and right ranges that are on the wrong global side */
|
||||
size_t numMisplacedRangesLeft = 0;
|
||||
size_t numMisplacedRangesRight = 0;
|
||||
size_t numMisplacedItemsLeft = 0;
|
||||
size_t numMisplacedItemsRight = 0;
|
||||
|
||||
for (size_t i=0; i<numTasks; i++)
|
||||
{
|
||||
const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
|
||||
const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
|
||||
const range<ssize_t> left_misplaced = globalLeft. intersect(right_range);
|
||||
const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
|
||||
|
||||
if (!left_misplaced.empty())
|
||||
{
|
||||
numMisplacedItemsLeft += left_misplaced.size();
|
||||
leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
|
||||
}
|
||||
|
||||
if (!right_misplaced.empty())
|
||||
{
|
||||
numMisplacedItemsRight += right_misplaced.size();
|
||||
rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
|
||||
}
|
||||
}
|
||||
assert( numMisplacedItemsLeft == numMisplacedItemsRight );
|
||||
|
||||
/* if no items are misplaced we are done */
|
||||
if (numMisplacedItemsLeft == 0)
|
||||
return mid;
|
||||
|
||||
/* otherwise we copy the items to the right place in parallel */
|
||||
parallel_for(numTasks,[&] (const size_t taskID) {
|
||||
const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
|
||||
const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks;
|
||||
swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);
|
||||
});
|
||||
|
||||
return mid;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||
__noinline size_t parallel_partitioning(T* array,
|
||||
const size_t begin,
|
||||
const size_t end,
|
||||
const Vi &identity,
|
||||
V &leftReduction,
|
||||
V &rightReduction,
|
||||
const IsLeft& is_left,
|
||||
const Reduction_T& reduction_t,
|
||||
const Reduction_V& reduction_v,
|
||||
size_t BLOCK_SIZE = 128)
|
||||
{
|
||||
/* fall back to single threaded partitioning for small N */
|
||||
if (unlikely(end-begin < BLOCK_SIZE))
|
||||
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
|
||||
|
||||
/* otherwise use parallel code */
|
||||
else {
|
||||
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
|
||||
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
|
||||
return begin+p->partition(leftReduction,rightReduction);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
|
||||
__noinline size_t parallel_partitioning(T* array,
|
||||
const size_t begin,
|
||||
const size_t end,
|
||||
const Vi &identity,
|
||||
V &leftReduction,
|
||||
V &rightReduction,
|
||||
const IsLeft& is_left,
|
||||
const Reduction_T& reduction_t,
|
||||
const Reduction_V& reduction_v,
|
||||
size_t BLOCK_SIZE,
|
||||
size_t PARALLEL_THRESHOLD)
|
||||
{
|
||||
/* fall back to single threaded partitioning for small N */
|
||||
if (unlikely(end-begin < PARALLEL_THRESHOLD))
|
||||
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
|
||||
|
||||
/* otherwise use parallel code */
|
||||
else {
|
||||
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
|
||||
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
|
||||
return begin+p->partition(leftReduction,rightReduction);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T, typename IsLeft>
|
||||
inline size_t parallel_partitioning(T* array,
|
||||
const size_t begin,
|
||||
const size_t end,
|
||||
const IsLeft& is_left,
|
||||
size_t BLOCK_SIZE = 128)
|
||||
{
|
||||
size_t leftReduction = 0;
|
||||
size_t rightReduction = 0;
|
||||
return parallel_partitioning(
|
||||
array,begin,end,0,leftReduction,rightReduction,is_left,
|
||||
[] (size_t& t,const T& ref) { },
|
||||
[] (size_t& t0,size_t& t1) { },
|
||||
BLOCK_SIZE);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,48 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_prefix_sum.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_prefix_sum_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_prefix_sum_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
const size_t M = 10;
|
||||
|
||||
for (size_t N=10; N<10000000; N=size_t(2.1*N))
|
||||
{
|
||||
/* initialize array with random numbers */
|
||||
uint32_t sum0 = 0;
|
||||
std::vector<uint32_t> src(N);
|
||||
for (size_t i=0; i<N; i++) {
|
||||
sum0 += src[i] = rand();
|
||||
}
|
||||
|
||||
/* calculate parallel prefix sum */
|
||||
std::vector<uint32_t> dst(N);
|
||||
for (auto& v : dst) v = 0;
|
||||
|
||||
for (size_t i=0; i<M; i++) {
|
||||
uint32_t sum1 = parallel_prefix_sum(src,dst,N,0,std::plus<uint32_t>());
|
||||
passed &= (sum0 == sum1);
|
||||
}
|
||||
|
||||
/* check if prefix sum is correct */
|
||||
for (size_t i=0, sum=0; i<N; sum+=src[i++])
|
||||
passed &= ((uint32_t)sum == dst[i]);
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_prefix_sum_regression_test parallel_prefix_sum_regression("parallel_prefix_sum_regression");
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Value>
|
||||
struct ParallelPrefixSumState
|
||||
{
|
||||
enum { MAX_TASKS = 64 };
|
||||
Value counts[MAX_TASKS];
|
||||
Value sums [MAX_TASKS];
|
||||
};
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
|
||||
{
|
||||
/* calculate number of tasks to use */
|
||||
const size_t numThreads = TaskScheduler::threadCount();
|
||||
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
|
||||
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
|
||||
|
||||
/* perform parallel prefix sum */
|
||||
parallel_for(taskCount, [&](const size_t taskIndex)
|
||||
{
|
||||
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
|
||||
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
|
||||
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
|
||||
});
|
||||
|
||||
/* calculate prefix sum */
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<taskCount; i++)
|
||||
{
|
||||
const Value c = state.counts[i];
|
||||
state.sums[i] = sum;
|
||||
sum=reduction(sum,c);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*! parallel calculation of prefix sums */
|
||||
template<typename SrcArray, typename DstArray, typename Value, typename Add>
|
||||
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
|
||||
{
|
||||
/* perform single threaded prefix operation for small N */
|
||||
if (N < SINGLE_THREAD_THRESHOLD)
|
||||
{
|
||||
Value sum=identity;
|
||||
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* perform parallel prefix operation for large N */
|
||||
else
|
||||
{
|
||||
ParallelPrefixSumState<Value> state;
|
||||
|
||||
/* initial run just sets up start values for subtasks */
|
||||
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||
|
||||
Value s = identity;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
|
||||
return s;
|
||||
|
||||
}, add);
|
||||
|
||||
/* final run calculates prefix sum */
|
||||
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
|
||||
|
||||
Value s = identity;
|
||||
for (size_t i=r.begin(); i<r.end(); i++) {
|
||||
dst[i] = add(sum,s);
|
||||
s = add(s,src[i]);
|
||||
}
|
||||
return s;
|
||||
|
||||
}, add);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,49 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_reduce.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_reduce_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_reduce_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
const size_t M = 10;
|
||||
for (size_t N=10; N<10000000; N=size_t(2.1*N))
|
||||
{
|
||||
/* sequentially calculate sum of squares */
|
||||
size_t sum0 = 0;
|
||||
for (size_t i=0; i<N; i++) {
|
||||
sum0 += i*i;
|
||||
}
|
||||
|
||||
/* parallel calculation of sum of squares */
|
||||
for (size_t m=0; m<M; m++)
|
||||
{
|
||||
size_t sum1 = parallel_reduce( size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r) -> size_t
|
||||
{
|
||||
size_t s = 0;
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
s += i*i;
|
||||
return s;
|
||||
},
|
||||
[](const size_t v0, const size_t v1) {
|
||||
return v0+v1;
|
||||
});
|
||||
passed = sum0 == sum1;
|
||||
}
|
||||
}
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_reduce_regression_test parallel_reduce_regression("parallel_reduce_regression_test");
|
||||
}
|
||||
@ -0,0 +1,146 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_for.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
return func(range<Index>(first,last));
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
return func(range<Index>(first,last));
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
const Index maxTasks = 512;
|
||||
const Index threadCount = (Index) TaskScheduler::threadCount();
|
||||
taskCount = min(taskCount,threadCount,maxTasks);
|
||||
|
||||
/* parallel invokation of all tasks */
|
||||
dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
|
||||
parallel_for(taskCount, [&](const Index taskIndex) {
|
||||
const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;
|
||||
const Index k1 = first+(taskIndex+1)*(last-first)/taskCount;
|
||||
values[taskIndex] = func(range<Index>(k0,k1));
|
||||
});
|
||||
|
||||
/* perform reduction over all tasks */
|
||||
Value v = identity;
|
||||
for (Index i=0; i<taskCount; i++) v = reduction(v,values[i]);
|
||||
return v;
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
#if defined(TASKING_INTERNAL)
|
||||
|
||||
/* fast path for small number of iterations */
|
||||
Index taskCount = (last-first+minStepSize-1)/minStepSize;
|
||||
if (likely(taskCount == 1)) {
|
||||
return func(range<Index>(first,last));
|
||||
}
|
||||
return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
|
||||
|
||||
#elif defined(TASKING_TBB)
|
||||
#if TBB_INTERFACE_VERSION >= 12002
|
||||
tbb::task_group_context context;
|
||||
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||
reduction,context);
|
||||
if (context.is_group_execution_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
return v;
|
||||
#else
|
||||
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
|
||||
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
|
||||
reduction);
|
||||
if (tbb::task::self().is_cancelled())
|
||||
throw std::runtime_error("task cancelled");
|
||||
return v;
|
||||
#endif
|
||||
#else // TASKING_PPL
|
||||
struct AlignedValue
|
||||
{
|
||||
char storage[__alignof(Value)+sizeof(Value)];
|
||||
static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); };
|
||||
Value* getValuePtr() { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
|
||||
const Value* getValuePtr() const { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
|
||||
AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
|
||||
AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
|
||||
AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
|
||||
AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
|
||||
AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
|
||||
operator Value() const { return *getValuePtr(); }
|
||||
};
|
||||
|
||||
struct Iterator_Index
|
||||
{
|
||||
Index v;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
typedef AlignedValue value_type;
|
||||
typedef Index difference_type;
|
||||
typedef Index distance_type;
|
||||
typedef AlignedValue* pointer;
|
||||
typedef AlignedValue& reference;
|
||||
__forceinline Iterator_Index() {}
|
||||
__forceinline Iterator_Index(Index v) : v(v) {}
|
||||
__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
|
||||
__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
|
||||
__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
|
||||
__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
|
||||
};
|
||||
|
||||
auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
|
||||
assert(begin.v < end.v);
|
||||
return reduction(start, func(range<Index>(begin.v, end.v)));
|
||||
};
|
||||
const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
|
||||
return v;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
if (likely(last-first < parallel_threshold)) {
|
||||
return func(range<Index>(first,last));
|
||||
} else {
|
||||
return parallel_reduce(first,last,minStepSize,identity,func,reduction);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
auto funcr = [&] ( const range<Index> r ) {
|
||||
Value v = identity;
|
||||
for (Index i=r.begin(); i<r.end(); i++)
|
||||
v = reduction(v,func(i));
|
||||
return v;
|
||||
};
|
||||
return parallel_reduce(first,last,Index(1),identity,funcr,reduction);
|
||||
}
|
||||
|
||||
template<typename Index, typename Value, typename Func, typename Reduction>
|
||||
__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
|
||||
{
|
||||
return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,43 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_set.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
struct parallel_set_regression_test : public RegressionTest
|
||||
{
|
||||
parallel_set_regression_test(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
|
||||
/* create vector with random numbers */
|
||||
const size_t N = 10000;
|
||||
std::vector<uint32_t> unsorted(N);
|
||||
for (size_t i=0; i<N; i++) unsorted[i] = 2*rand();
|
||||
|
||||
/* created set from numbers */
|
||||
parallel_set<uint32_t> sorted;
|
||||
sorted.init(unsorted);
|
||||
|
||||
/* check that all elements are in the set */
|
||||
for (size_t i=0; i<N; i++) {
|
||||
passed &= sorted.lookup(unsorted[i]);
|
||||
}
|
||||
|
||||
/* check that these elements are not in the set */
|
||||
for (size_t i=0; i<N; i++) {
|
||||
passed &= !sorted.lookup(unsorted[i]+1);
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
parallel_set_regression_test parallel_set_regression("parallel_set_regression_test");
|
||||
}
|
||||
@ -0,0 +1,52 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "parallel_sort.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* implementation of a set of values with parallel construction */
|
||||
template<typename T>
|
||||
class parallel_set
|
||||
{
|
||||
public:
|
||||
|
||||
/*! default constructor for the parallel set */
|
||||
parallel_set () {}
|
||||
|
||||
/*! construction from vector */
|
||||
template<typename Vector>
|
||||
parallel_set (const Vector& in) { init(in); }
|
||||
|
||||
/*! initialized the parallel set from a vector */
|
||||
template<typename Vector>
|
||||
void init(const Vector& in)
|
||||
{
|
||||
/* copy data to internal vector */
|
||||
vec.resize(in.size());
|
||||
parallel_for( size_t(0), in.size(), size_t(4*4096), [&](const range<size_t>& r) {
|
||||
for (size_t i=r.begin(); i<r.end(); i++)
|
||||
vec[i] = in[i];
|
||||
});
|
||||
|
||||
/* sort the data */
|
||||
std::vector<T> temp(in.size());
|
||||
radix_sort<T>(vec.data(),temp.data(),vec.size());
|
||||
}
|
||||
|
||||
/*! tests if some element is in the set */
|
||||
__forceinline bool lookup(const T& elt) const {
|
||||
return std::binary_search(vec.begin(), vec.end(), elt);
|
||||
}
|
||||
|
||||
/*! clears all state */
|
||||
void clear() {
|
||||
vec.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<T> vec; //!< vector containing sorted elements
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,50 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "parallel_sort.h"
|
||||
#include "../sys/regression.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<typename Key>
|
||||
struct RadixSortRegressionTest : public RegressionTest
|
||||
{
|
||||
RadixSortRegressionTest(const char* name) : RegressionTest(name) {
|
||||
registerRegressionTest(this);
|
||||
}
|
||||
|
||||
bool run ()
|
||||
{
|
||||
bool passed = true;
|
||||
const size_t M = 10;
|
||||
|
||||
for (size_t N=10; N<1000000; N=size_t(2.1*N))
|
||||
{
|
||||
std::vector<Key> src(N); memset(src.data(),0,N*sizeof(Key));
|
||||
std::vector<Key> tmp(N); memset(tmp.data(),0,N*sizeof(Key));
|
||||
for (size_t i=0; i<N; i++) src[i] = uint64_t(rand())*uint64_t(rand());
|
||||
|
||||
/* calculate checksum */
|
||||
Key sum0 = 0; for (size_t i=0; i<N; i++) sum0 += src[i];
|
||||
|
||||
/* sort numbers */
|
||||
for (size_t i=0; i<M; i++) {
|
||||
radix_sort<Key>(src.data(),tmp.data(),N);
|
||||
}
|
||||
|
||||
/* calculate checksum */
|
||||
Key sum1 = 0; for (size_t i=0; i<N; i++) sum1 += src[i];
|
||||
if (sum0 != sum1) passed = false;
|
||||
|
||||
/* check if numbers are sorted */
|
||||
for (size_t i=1; i<N; i++)
|
||||
passed &= src[i-1] <= src[i];
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
};
|
||||
|
||||
RadixSortRegressionTest<uint32_t> test_u32("RadixSortRegressionTestU32");
|
||||
RadixSortRegressionTest<uint64_t> test_u64("RadixSortRegressionTestU64");
|
||||
}
|
||||
@ -0,0 +1,454 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../simd/simd.h"
|
||||
#include "parallel_for.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<class T>
|
||||
__forceinline void insertionsort_ascending(T *__restrict__ array, const size_t length)
|
||||
{
|
||||
for(size_t i = 1;i<length;++i)
|
||||
{
|
||||
T v = array[i];
|
||||
size_t j = i;
|
||||
while(j > 0 && v < array[j-1])
|
||||
{
|
||||
array[j] = array[j-1];
|
||||
--j;
|
||||
}
|
||||
array[j] = v;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__forceinline void insertionsort_decending(T *__restrict__ array, const size_t length)
|
||||
{
|
||||
for(size_t i = 1;i<length;++i)
|
||||
{
|
||||
T v = array[i];
|
||||
size_t j = i;
|
||||
while(j > 0 && v > array[j-1])
|
||||
{
|
||||
array[j] = array[j-1];
|
||||
--j;
|
||||
}
|
||||
array[j] = v;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void quicksort_ascending(T *__restrict__ t,
|
||||
const ssize_t begin,
|
||||
const ssize_t end)
|
||||
{
|
||||
if (likely(begin < end))
|
||||
{
|
||||
const T pivotvalue = t[begin];
|
||||
ssize_t left = begin - 1;
|
||||
ssize_t right = end + 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
while (t[--right] > pivotvalue);
|
||||
while (t[++left] < pivotvalue);
|
||||
|
||||
if (left >= right) break;
|
||||
|
||||
const T temp = t[right];
|
||||
t[right] = t[left];
|
||||
t[left] = temp;
|
||||
}
|
||||
|
||||
const int pivot = right;
|
||||
quicksort_ascending(t, begin, pivot);
|
||||
quicksort_ascending(t, pivot + 1, end);
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void quicksort_decending(T *__restrict__ t,
|
||||
const ssize_t begin,
|
||||
const ssize_t end)
|
||||
{
|
||||
if (likely(begin < end))
|
||||
{
|
||||
const T pivotvalue = t[begin];
|
||||
ssize_t left = begin - 1;
|
||||
ssize_t right = end + 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
while (t[--right] < pivotvalue);
|
||||
while (t[++left] > pivotvalue);
|
||||
|
||||
if (left >= right) break;
|
||||
|
||||
const T temp = t[right];
|
||||
t[right] = t[left];
|
||||
t[left] = temp;
|
||||
}
|
||||
|
||||
const int pivot = right;
|
||||
quicksort_decending(t, begin, pivot);
|
||||
quicksort_decending(t, pivot + 1, end);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T, ssize_t THRESHOLD>
|
||||
void quicksort_insertionsort_ascending(T *__restrict__ t,
|
||||
const ssize_t begin,
|
||||
const ssize_t end)
|
||||
{
|
||||
if (likely(begin < end))
|
||||
{
|
||||
const ssize_t size = end-begin+1;
|
||||
if (likely(size <= THRESHOLD))
|
||||
{
|
||||
insertionsort_ascending<T>(&t[begin],size);
|
||||
}
|
||||
else
|
||||
{
|
||||
const T pivotvalue = t[begin];
|
||||
ssize_t left = begin - 1;
|
||||
ssize_t right = end + 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
while (t[--right] > pivotvalue);
|
||||
while (t[++left] < pivotvalue);
|
||||
|
||||
if (left >= right) break;
|
||||
|
||||
const T temp = t[right];
|
||||
t[right] = t[left];
|
||||
t[left] = temp;
|
||||
}
|
||||
|
||||
const ssize_t pivot = right;
|
||||
quicksort_insertionsort_ascending<T,THRESHOLD>(t, begin, pivot);
|
||||
quicksort_insertionsort_ascending<T,THRESHOLD>(t, pivot + 1, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T, ssize_t THRESHOLD>
|
||||
void quicksort_insertionsort_decending(T *__restrict__ t,
|
||||
const ssize_t begin,
|
||||
const ssize_t end)
|
||||
{
|
||||
if (likely(begin < end))
|
||||
{
|
||||
const ssize_t size = end-begin+1;
|
||||
if (likely(size <= THRESHOLD))
|
||||
{
|
||||
insertionsort_decending<T>(&t[begin],size);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
const T pivotvalue = t[begin];
|
||||
ssize_t left = begin - 1;
|
||||
ssize_t right = end + 1;
|
||||
|
||||
while(1)
|
||||
{
|
||||
while (t[--right] < pivotvalue);
|
||||
while (t[++left] > pivotvalue);
|
||||
|
||||
if (left >= right) break;
|
||||
|
||||
const T temp = t[right];
|
||||
t[right] = t[left];
|
||||
t[left] = temp;
|
||||
}
|
||||
|
||||
const ssize_t pivot = right;
|
||||
quicksort_insertionsort_decending<T,THRESHOLD>(t, begin, pivot);
|
||||
quicksort_insertionsort_decending<T,THRESHOLD>(t, pivot + 1, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void radixsort32(T* const morton, const size_t num, const unsigned int shift = 3*8)
|
||||
{
|
||||
static const unsigned int BITS = 8;
|
||||
static const unsigned int BUCKETS = (1 << BITS);
|
||||
static const unsigned int CMP_SORT_THRESHOLD = 16;
|
||||
|
||||
__aligned(64) unsigned int count[BUCKETS];
|
||||
|
||||
/* clear buckets */
|
||||
for (size_t i=0;i<BUCKETS;i++) count[i] = 0;
|
||||
|
||||
/* count buckets */
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma nounroll
|
||||
#endif
|
||||
for (size_t i=0;i<num;i++)
|
||||
count[(unsigned(morton[i]) >> shift) & (BUCKETS-1)]++;
|
||||
|
||||
/* prefix sums */
|
||||
__aligned(64) unsigned int head[BUCKETS];
|
||||
__aligned(64) unsigned int tail[BUCKETS];
|
||||
|
||||
head[0] = 0;
|
||||
for (size_t i=1; i<BUCKETS; i++)
|
||||
head[i] = head[i-1] + count[i-1];
|
||||
|
||||
for (size_t i=0; i<BUCKETS-1; i++)
|
||||
tail[i] = head[i+1];
|
||||
|
||||
tail[BUCKETS-1] = head[BUCKETS-1] + count[BUCKETS-1];
|
||||
|
||||
assert(tail[BUCKETS-1] == head[BUCKETS-1] + count[BUCKETS-1]);
|
||||
assert(tail[BUCKETS-1] == num);
|
||||
|
||||
/* in-place swap */
|
||||
for (size_t i=0;i<BUCKETS;i++)
|
||||
{
|
||||
/* process bucket */
|
||||
while(head[i] < tail[i])
|
||||
{
|
||||
T v = morton[head[i]];
|
||||
while(1)
|
||||
{
|
||||
const size_t b = (unsigned(v) >> shift) & (BUCKETS-1);
|
||||
if (b == i) break;
|
||||
std::swap(v,morton[head[b]++]);
|
||||
}
|
||||
assert((unsigned(v) >> shift & (BUCKETS-1)) == i);
|
||||
morton[head[i]++] = v;
|
||||
}
|
||||
}
|
||||
if (shift == 0) return;
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i=0;i<BUCKETS;i++)
|
||||
if (count[i])
|
||||
{
|
||||
|
||||
for (size_t j=offset;j<offset+count[i]-1;j++)
|
||||
assert(((unsigned(morton[j]) >> shift) & (BUCKETS-1)) == i);
|
||||
|
||||
if (unlikely(count[i] < CMP_SORT_THRESHOLD))
|
||||
insertionsort_ascending(morton + offset, count[i]);
|
||||
else
|
||||
radixsort32(morton + offset, count[i], shift-BITS);
|
||||
|
||||
for (size_t j=offset;j<offset+count[i]-1;j++)
|
||||
assert(morton[j] <= morton[j+1]);
|
||||
|
||||
offset += count[i];
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Ty, typename Key>
|
||||
class ParallelRadixSort
|
||||
{
|
||||
static const size_t MAX_TASKS = 64;
|
||||
static const size_t BITS = 8;
|
||||
static const size_t BUCKETS = (1 << BITS);
|
||||
typedef unsigned int TyRadixCount[BUCKETS];
|
||||
|
||||
template<typename T>
|
||||
static bool compare(const T& v0, const T& v1) {
|
||||
return (Key)v0 < (Key)v1;
|
||||
}
|
||||
|
||||
private:
|
||||
ParallelRadixSort (const ParallelRadixSort& other) DELETED; // do not implement
|
||||
ParallelRadixSort& operator= (const ParallelRadixSort& other) DELETED; // do not implement
|
||||
|
||||
|
||||
public:
|
||||
ParallelRadixSort (Ty* const src, Ty* const tmp, const size_t N)
|
||||
: radixCount(nullptr), src(src), tmp(tmp), N(N) {}
|
||||
|
||||
void sort(const size_t blockSize)
|
||||
{
|
||||
assert(blockSize > 0);
|
||||
|
||||
/* perform single threaded sort for small N */
|
||||
if (N<=blockSize) // handles also special case of 0!
|
||||
{
|
||||
/* do inplace sort inside destination array */
|
||||
std::sort(src,src+N,compare<Ty>);
|
||||
}
|
||||
|
||||
/* perform parallel sort for large N */
|
||||
else
|
||||
{
|
||||
const size_t numThreads = min((N+blockSize-1)/blockSize,TaskScheduler::threadCount(),size_t(MAX_TASKS));
|
||||
tbbRadixSort(numThreads);
|
||||
}
|
||||
}
|
||||
|
||||
~ParallelRadixSort()
|
||||
{
|
||||
alignedFree(radixCount);
|
||||
radixCount = nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void tbbRadixIteration0(const Key shift,
|
||||
const Ty* __restrict const src,
|
||||
Ty* __restrict const dst,
|
||||
const size_t threadIndex, const size_t threadCount)
|
||||
{
|
||||
const size_t startID = (threadIndex+0)*N/threadCount;
|
||||
const size_t endID = (threadIndex+1)*N/threadCount;
|
||||
|
||||
/* mask to extract some number of bits */
|
||||
const Key mask = BUCKETS-1;
|
||||
|
||||
/* count how many items go into the buckets */
|
||||
for (size_t i=0; i<BUCKETS; i++)
|
||||
radixCount[threadIndex][i] = 0;
|
||||
|
||||
/* iterate over src array and count buckets */
|
||||
unsigned int * __restrict const count = radixCount[threadIndex];
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma nounroll
|
||||
#endif
|
||||
for (size_t i=startID; i<endID; i++) {
|
||||
#if defined(__X86_64__)
|
||||
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
|
||||
#else
|
||||
const Key index = ((Key)src[i] >> shift) & mask;
|
||||
#endif
|
||||
count[index]++;
|
||||
}
|
||||
}
|
||||
|
||||
void tbbRadixIteration1(const Key shift,
|
||||
const Ty* __restrict const src,
|
||||
Ty* __restrict const dst,
|
||||
const size_t threadIndex, const size_t threadCount)
|
||||
{
|
||||
const size_t startID = (threadIndex+0)*N/threadCount;
|
||||
const size_t endID = (threadIndex+1)*N/threadCount;
|
||||
|
||||
/* mask to extract some number of bits */
|
||||
const Key mask = BUCKETS-1;
|
||||
|
||||
/* calculate total number of items for each bucket */
|
||||
__aligned(64) unsigned int total[BUCKETS];
|
||||
/*
|
||||
for (size_t i=0; i<BUCKETS; i++)
|
||||
total[i] = 0;
|
||||
*/
|
||||
for (size_t i=0; i<BUCKETS; i+=VSIZEX)
|
||||
vintx::store(&total[i], zero);
|
||||
|
||||
for (size_t i=0; i<threadCount; i++)
|
||||
{
|
||||
/*
|
||||
for (size_t j=0; j<BUCKETS; j++)
|
||||
total[j] += radixCount[i][j];
|
||||
*/
|
||||
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
|
||||
vintx::store(&total[j], vintx::load(&total[j]) + vintx::load(&radixCount[i][j]));
|
||||
}
|
||||
|
||||
/* calculate start offset of each bucket */
|
||||
__aligned(64) unsigned int offset[BUCKETS];
|
||||
offset[0] = 0;
|
||||
for (size_t i=1; i<BUCKETS; i++)
|
||||
offset[i] = offset[i-1] + total[i-1];
|
||||
|
||||
/* calculate start offset of each bucket for this thread */
|
||||
for (size_t i=0; i<threadIndex; i++)
|
||||
{
|
||||
/*
|
||||
for (size_t j=0; j<BUCKETS; j++)
|
||||
offset[j] += radixCount[i][j];
|
||||
*/
|
||||
for (size_t j=0; j<BUCKETS; j+=VSIZEX)
|
||||
vintx::store(&offset[j], vintx::load(&offset[j]) + vintx::load(&radixCount[i][j]));
|
||||
}
|
||||
|
||||
/* copy items into their buckets */
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma nounroll
|
||||
#endif
|
||||
for (size_t i=startID; i<endID; i++) {
|
||||
const Ty elt = src[i];
|
||||
#if defined(__X86_64__)
|
||||
const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask;
|
||||
#else
|
||||
const size_t index = ((Key)src[i] >> shift) & mask;
|
||||
#endif
|
||||
dst[offset[index]++] = elt;
|
||||
}
|
||||
}
|
||||
|
||||
void tbbRadixIteration(const Key shift, const bool last,
|
||||
const Ty* __restrict src, Ty* __restrict dst,
|
||||
const size_t numTasks)
|
||||
{
|
||||
affinity_partitioner ap;
|
||||
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration0(shift,src,dst,taskIndex,numTasks); },ap);
|
||||
parallel_for_affinity(numTasks,[&] (size_t taskIndex) { tbbRadixIteration1(shift,src,dst,taskIndex,numTasks); },ap);
|
||||
}
|
||||
|
||||
void tbbRadixSort(const size_t numTasks)
|
||||
{
|
||||
radixCount = (TyRadixCount*) alignedMalloc(MAX_TASKS*sizeof(TyRadixCount),64);
|
||||
|
||||
if (sizeof(Key) == sizeof(uint32_t)) {
|
||||
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
|
||||
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(3*BITS,1,tmp,src,numTasks);
|
||||
}
|
||||
else if (sizeof(Key) == sizeof(uint64_t))
|
||||
{
|
||||
tbbRadixIteration(0*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(1*BITS,0,tmp,src,numTasks);
|
||||
tbbRadixIteration(2*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(3*BITS,0,tmp,src,numTasks);
|
||||
tbbRadixIteration(4*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(5*BITS,0,tmp,src,numTasks);
|
||||
tbbRadixIteration(6*BITS,0,src,tmp,numTasks);
|
||||
tbbRadixIteration(7*BITS,1,tmp,src,numTasks);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
TyRadixCount* radixCount;
|
||||
Ty* const src;
|
||||
Ty* const tmp;
|
||||
const size_t N;
|
||||
};
|
||||
|
||||
template<typename Ty>
|
||||
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
|
||||
{
|
||||
ParallelRadixSort<Ty,Ty>(src,tmp,N).sort(blockSize);
|
||||
}
|
||||
|
||||
template<typename Ty, typename Key>
|
||||
void radix_sort(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192)
|
||||
{
|
||||
ParallelRadixSort<Ty,Key>(src,tmp,N).sort(blockSize);
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
void radix_sort_u32(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
|
||||
radix_sort<Ty,uint32_t>(src,tmp,N,blockSize);
|
||||
}
|
||||
|
||||
template<typename Ty>
|
||||
void radix_sort_u64(Ty* const src, Ty* const tmp, const size_t N, const size_t blockSize = 8192) {
|
||||
radix_sort<Ty,uint64_t>(src,tmp,N,blockSize);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,101 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "stringstream.h"
|
||||
#include "../sys/filename.h"
|
||||
#include "../math/vec2.h"
|
||||
#include "../math/vec3.h"
|
||||
#include "../math/col3.h"
|
||||
#include "../math/color.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! helper class for simple command line parsing */
|
||||
class ParseStream : public Stream<std::string>
|
||||
{
|
||||
public:
|
||||
ParseStream (const Ref<Stream<std::string> >& cin) : cin(cin) {}
|
||||
|
||||
ParseStream (const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
|
||||
const std::string& endl = "", bool multiLine = false)
|
||||
: cin(new StringStream(cin,seps,endl,multiLine)) {}
|
||||
|
||||
public:
|
||||
ParseLocation location() { return cin->loc(); }
|
||||
std::string next() { return cin->get(); }
|
||||
|
||||
void force(const std::string& next) {
|
||||
std::string token = getString();
|
||||
if (token != next)
|
||||
THROW_RUNTIME_ERROR("token \""+next+"\" expected but token \""+token+"\" found");
|
||||
}
|
||||
|
||||
std::string getString() {
|
||||
return get();
|
||||
}
|
||||
|
||||
FileName getFileName() {
|
||||
return FileName(get());
|
||||
}
|
||||
|
||||
int getInt () {
|
||||
return atoi(get().c_str());
|
||||
}
|
||||
|
||||
Vec2i getVec2i() {
|
||||
int x = atoi(get().c_str());
|
||||
int y = atoi(get().c_str());
|
||||
return Vec2i(x,y);
|
||||
}
|
||||
|
||||
Vec3ia getVec3ia() {
|
||||
int x = atoi(get().c_str());
|
||||
int y = atoi(get().c_str());
|
||||
int z = atoi(get().c_str());
|
||||
return Vec3ia(x,y,z);
|
||||
}
|
||||
|
||||
float getFloat() {
|
||||
return (float)atof(get().c_str());
|
||||
}
|
||||
|
||||
Vec2f getVec2f() {
|
||||
float x = (float)atof(get().c_str());
|
||||
float y = (float)atof(get().c_str());
|
||||
return Vec2f(x,y);
|
||||
}
|
||||
|
||||
Vec3f getVec3f() {
|
||||
float x = (float)atof(get().c_str());
|
||||
float y = (float)atof(get().c_str());
|
||||
float z = (float)atof(get().c_str());
|
||||
return Vec3f(x,y,z);
|
||||
}
|
||||
|
||||
Vec3fa getVec3fa() {
|
||||
float x = (float)atof(get().c_str());
|
||||
float y = (float)atof(get().c_str());
|
||||
float z = (float)atof(get().c_str());
|
||||
return Vec3fa(x,y,z);
|
||||
}
|
||||
|
||||
Col3f getCol3f() {
|
||||
float x = (float)atof(get().c_str());
|
||||
float y = (float)atof(get().c_str());
|
||||
float z = (float)atof(get().c_str());
|
||||
return Col3f(x,y,z);
|
||||
}
|
||||
|
||||
Color getColor() {
|
||||
float r = (float)atof(get().c_str());
|
||||
float g = (float)atof(get().c_str());
|
||||
float b = (float)atof(get().c_str());
|
||||
return Color(r,g,b);
|
||||
}
|
||||
|
||||
private:
|
||||
Ref<Stream<std::string> > cin;
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,215 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../sys/platform.h"
|
||||
#include "../sys/ref.h"
|
||||
#include "../sys/filename.h"
|
||||
#include "../sys/string.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <string.h>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! stores the location of a stream element in the source */
|
||||
class ParseLocation
|
||||
{
|
||||
public:
|
||||
ParseLocation () : lineNumber(-1), colNumber(-1) {}
|
||||
ParseLocation (std::shared_ptr<std::string> fileName, ssize_t lineNumber, ssize_t colNumber, ssize_t /*charNumber*/)
|
||||
: fileName(fileName), lineNumber(lineNumber), colNumber(colNumber) {}
|
||||
|
||||
std::string str() const
|
||||
{
|
||||
std::string str = "unknown";
|
||||
if (fileName) str = *fileName;
|
||||
if (lineNumber >= 0) str += " line " + toString(lineNumber);
|
||||
if (lineNumber >= 0 && colNumber >= 0) str += " character " + toString(colNumber);
|
||||
return str;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<std::string> fileName; /// name of the file (or stream) the token is from
|
||||
ssize_t lineNumber; /// the line number the token is from
|
||||
ssize_t colNumber; /// the character number in the current line
|
||||
};
|
||||
|
||||
/*! a stream class templated over the stream elements */
|
||||
template<typename T> class Stream : public RefCount
|
||||
{
|
||||
enum { BUF_SIZE = 1024 };
|
||||
|
||||
private:
|
||||
virtual T next() = 0;
|
||||
virtual ParseLocation location() = 0;
|
||||
__forceinline std::pair<T,ParseLocation> nextHelper() {
|
||||
ParseLocation l = location();
|
||||
T v = next();
|
||||
return std::pair<T,ParseLocation>(v,l);
|
||||
}
|
||||
__forceinline void push_back(const std::pair<T,ParseLocation>& v) {
|
||||
if (past+future == BUF_SIZE) pop_front();
|
||||
size_t end = (start+past+future++)%BUF_SIZE;
|
||||
buffer[end] = v;
|
||||
}
|
||||
__forceinline void pop_front() {
|
||||
if (past == 0) THROW_RUNTIME_ERROR("stream buffer empty");
|
||||
start = (start+1)%BUF_SIZE; past--;
|
||||
}
|
||||
public:
|
||||
Stream () : start(0), past(0), future(0), buffer(BUF_SIZE) {}
|
||||
virtual ~Stream() {}
|
||||
|
||||
public:
|
||||
|
||||
const ParseLocation& loc() {
|
||||
if (future == 0) push_back(nextHelper());
|
||||
return buffer[(start+past)%BUF_SIZE].second;
|
||||
}
|
||||
T get() {
|
||||
if (future == 0) push_back(nextHelper());
|
||||
T t = buffer[(start+past)%BUF_SIZE].first;
|
||||
past++; future--;
|
||||
return t;
|
||||
}
|
||||
const T& peek() {
|
||||
if (future == 0) push_back(nextHelper());
|
||||
return buffer[(start+past)%BUF_SIZE].first;
|
||||
}
|
||||
const T& unget(size_t n = 1) {
|
||||
if (past < n) THROW_RUNTIME_ERROR ("cannot unget that many items");
|
||||
past -= n; future += n;
|
||||
return peek();
|
||||
}
|
||||
void drop() {
|
||||
if (future == 0) push_back(nextHelper());
|
||||
past++; future--;
|
||||
}
|
||||
private:
|
||||
size_t start,past,future;
|
||||
std::vector<std::pair<T,ParseLocation> > buffer;
|
||||
};
|
||||
|
||||
/*! warps an iostream stream */
|
||||
class StdStream : public Stream<int>
|
||||
{
|
||||
public:
|
||||
StdStream (std::istream& cin, const std::string& name = "std::stream")
|
||||
: cin(cin), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
|
||||
~StdStream() {}
|
||||
ParseLocation location() {
|
||||
return ParseLocation(name,lineNumber,colNumber,charNumber);
|
||||
}
|
||||
int next() {
|
||||
int c = cin.get();
|
||||
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||
charNumber++;
|
||||
return c;
|
||||
}
|
||||
private:
|
||||
std::istream& cin;
|
||||
ssize_t lineNumber; /// the line number the token is from
|
||||
ssize_t colNumber; /// the character number in the current line
|
||||
ssize_t charNumber; /// the character in the file
|
||||
std::shared_ptr<std::string> name; /// name of buffer
|
||||
};
|
||||
|
||||
/*! creates a stream from a file */
|
||||
class FileStream : public Stream<int>
|
||||
{
|
||||
public:
|
||||
|
||||
FileStream (FILE* file, const std::string& name = "file")
|
||||
: file(file), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
|
||||
|
||||
FileStream (const FileName& fileName)
|
||||
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
|
||||
{
|
||||
file = fopen(fileName.c_str(),"r");
|
||||
if (file == nullptr) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
|
||||
}
|
||||
~FileStream() { if (file) fclose(file); }
|
||||
|
||||
public:
|
||||
ParseLocation location() {
|
||||
return ParseLocation(name,lineNumber,colNumber,charNumber);
|
||||
}
|
||||
|
||||
int next() {
|
||||
int c = fgetc(file);
|
||||
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||
charNumber++;
|
||||
return c;
|
||||
}
|
||||
|
||||
private:
|
||||
FILE* file;
|
||||
ssize_t lineNumber; /// the line number the token is from
|
||||
ssize_t colNumber; /// the character number in the current line
|
||||
ssize_t charNumber; /// the character in the file
|
||||
std::shared_ptr<std::string> name; /// name of buffer
|
||||
};
|
||||
|
||||
/*! creates a stream from a string */
|
||||
class StrStream : public Stream<int>
|
||||
{
|
||||
public:
|
||||
|
||||
StrStream (const char* str)
|
||||
: str(str), lineNumber(1), colNumber(0), charNumber(0) {}
|
||||
|
||||
public:
|
||||
ParseLocation location() {
|
||||
return ParseLocation(std::shared_ptr<std::string>(),lineNumber,colNumber,charNumber);
|
||||
}
|
||||
|
||||
int next() {
|
||||
int c = str[charNumber];
|
||||
if (c == 0) return EOF;
|
||||
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
|
||||
charNumber++;
|
||||
return c;
|
||||
}
|
||||
|
||||
private:
|
||||
const char* str;
|
||||
ssize_t lineNumber; /// the line number the token is from
|
||||
ssize_t colNumber; /// the character number in the current line
|
||||
ssize_t charNumber; /// the character in the file
|
||||
};
|
||||
|
||||
/*! creates a character stream from a command line */
|
||||
class CommandLineStream : public Stream<int>
|
||||
{
|
||||
public:
|
||||
CommandLineStream (int argc, char** argv, const std::string& name = "command line")
|
||||
: i(0), j(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name)))
|
||||
{
|
||||
if (argc > 0) {
|
||||
for (size_t i=0; argv[0][i] && i<1024; i++) charNumber++;
|
||||
charNumber++;
|
||||
}
|
||||
for (ssize_t k=1; k<argc; k++) args.push_back(argv[k]);
|
||||
}
|
||||
~CommandLineStream() {}
|
||||
public:
|
||||
ParseLocation location() {
|
||||
return ParseLocation(name,0,charNumber,charNumber);
|
||||
}
|
||||
int next() {
|
||||
if (i == args.size()) return EOF;
|
||||
if (j == args[i].size()) { i++; j=0; charNumber++; return ' '; }
|
||||
charNumber++;
|
||||
return args[i][j++];
|
||||
}
|
||||
private:
|
||||
size_t i,j;
|
||||
std::vector<std::string> args;
|
||||
ssize_t charNumber; /// the character in the file
|
||||
std::shared_ptr<std::string> name; /// name of buffer
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,39 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "stream.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* removes all line comments from a stream */
|
||||
class LineCommentFilter : public Stream<int>
|
||||
{
|
||||
public:
|
||||
LineCommentFilter (const FileName& fileName, const std::string& lineComment)
|
||||
: cin(new FileStream(fileName)), lineComment(lineComment) {}
|
||||
LineCommentFilter (Ref<Stream<int> > cin, const std::string& lineComment)
|
||||
: cin(cin), lineComment(lineComment) {}
|
||||
|
||||
ParseLocation location() { return cin->loc(); }
|
||||
|
||||
int next()
|
||||
{
|
||||
/* look if the line comment starts here */
|
||||
for (size_t j=0; j<lineComment.size(); j++) {
|
||||
if (cin->peek() != lineComment[j]) { cin->unget(j); goto not_found; }
|
||||
cin->get();
|
||||
}
|
||||
/* eat all characters until the end of the line (or file) */
|
||||
while (cin->peek() != '\n' && cin->peek() != EOF) cin->get();
|
||||
|
||||
not_found:
|
||||
return cin->get();
|
||||
}
|
||||
|
||||
private:
|
||||
Ref<Stream<int> > cin;
|
||||
std::string lineComment;
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,48 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "stringstream.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
static const std::string stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
|
||||
|
||||
/* creates map for fast categorization of characters */
|
||||
static void createCharMap(bool map[256], const std::string& chrs) {
|
||||
for (size_t i=0; i<256; i++) map[i] = false;
|
||||
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
|
||||
}
|
||||
|
||||
/* simple tokenizer */
|
||||
StringStream::StringStream(const Ref<Stream<int> >& cin, const std::string& seps, const std::string& endl, bool multiLine)
|
||||
: cin(cin), endl(endl), multiLine(multiLine)
|
||||
{
|
||||
createCharMap(isSepMap,seps);
|
||||
createCharMap(isValidCharMap,stringChars);
|
||||
}
|
||||
|
||||
std::string StringStream::next()
|
||||
{
|
||||
/* skip separators */
|
||||
while (cin->peek() != EOF) {
|
||||
if (endl != "" && cin->peek() == '\n') { cin->drop(); return endl; }
|
||||
if (multiLine && cin->peek() == '\\') {
|
||||
cin->drop();
|
||||
if (cin->peek() == '\n') { cin->drop(); continue; }
|
||||
cin->unget();
|
||||
}
|
||||
if (!isSeparator(cin->peek())) break;
|
||||
cin->drop();
|
||||
}
|
||||
|
||||
/* parse everything until the next separator */
|
||||
std::vector<char> str; str.reserve(64);
|
||||
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
|
||||
int c = cin->get();
|
||||
if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
|
||||
str.push_back((char)c);
|
||||
}
|
||||
str.push_back(0);
|
||||
return std::string(str.data());
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "stream.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! simple tokenizer that produces a string stream */
|
||||
class StringStream : public Stream<std::string>
|
||||
{
|
||||
public:
|
||||
StringStream(const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
|
||||
const std::string& endl = "", bool multiLine = false);
|
||||
public:
|
||||
ParseLocation location() { return cin->loc(); }
|
||||
std::string next();
|
||||
private:
|
||||
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
|
||||
__forceinline bool isValidChar(unsigned int c) const { return c<256 && isValidCharMap[c]; }
|
||||
private:
|
||||
Ref<Stream<int> > cin; /*! source character stream */
|
||||
bool isSepMap[256]; /*! map for fast classification of separators */
|
||||
bool isValidCharMap[256]; /*! map for valid characters */
|
||||
std::string endl; /*! the token of the end of line */
|
||||
bool multiLine; /*! whether to parse lines wrapped with \ */
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,181 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "tokenstream.h"
|
||||
#include "../math/math.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/* shorthands for common sets of characters */
|
||||
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
|
||||
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const std::string TokenStream::numbers = "0123456789";
|
||||
const std::string TokenStream::separators = "\n\t\r ";
|
||||
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
|
||||
|
||||
/* creates map for fast categorization of characters */
|
||||
static void createCharMap(bool map[256], const std::string& chrs) {
|
||||
for (size_t i=0; i<256; i++) map[i] = false;
|
||||
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
|
||||
}
|
||||
|
||||
/* build full tokenizer that takes list of valid characters and keywords */
|
||||
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
|
||||
const std::string& alpha, //< valid characters for identifiers
|
||||
const std::string& seps, //< characters that act as separators
|
||||
const std::vector<std::string>& symbols) //< symbols
|
||||
: cin(cin), symbols(symbols)
|
||||
{
|
||||
createCharMap(isAlphaMap,alpha);
|
||||
createCharMap(isSepMap,seps);
|
||||
createCharMap(isStringCharMap,stringChars);
|
||||
}
|
||||
|
||||
bool TokenStream::decDigits(std::string& str_o)
|
||||
{
|
||||
bool ok = false;
|
||||
std::string str;
|
||||
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
|
||||
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
|
||||
if (ok) str_o += str;
|
||||
else cin->unget(str.size());
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool TokenStream::decDigits1(std::string& str_o)
|
||||
{
|
||||
bool ok = false;
|
||||
std::string str;
|
||||
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
|
||||
if (ok) str_o += str; else cin->unget(str.size());
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool TokenStream::trySymbol(const std::string& symbol)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while (pos < symbol.size()) {
|
||||
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
|
||||
cin->drop(); pos++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
|
||||
{
|
||||
for (size_t i=0; i<symbols.size(); i++) {
|
||||
if (!trySymbol(symbols[i])) continue;
|
||||
token = Token(symbols[i],Token::TY_SYMBOL,loc);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
|
||||
{
|
||||
bool ok = false;
|
||||
std::string str;
|
||||
if (trySymbol("nan")) {
|
||||
token = Token(float(nan));
|
||||
return true;
|
||||
}
|
||||
if (trySymbol("+inf")) {
|
||||
token = Token(float(pos_inf));
|
||||
return true;
|
||||
}
|
||||
if (trySymbol("-inf")) {
|
||||
token = Token(float(neg_inf));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (decDigits(str))
|
||||
{
|
||||
if (cin->peek() == '.') {
|
||||
str += (char)cin->get();
|
||||
decDigits(str);
|
||||
if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||
str += (char)cin->get();
|
||||
if (decDigits(str)) ok = true; // 1.[2]E2
|
||||
}
|
||||
else ok = true; // 1.[2]
|
||||
}
|
||||
else if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||
str += (char)cin->get();
|
||||
if (decDigits(str)) ok = true; // 1E2
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cin->peek() == '.') {
|
||||
str += (char)cin->get();
|
||||
if (decDigits(str)) {
|
||||
if (cin->peek() == 'e' || cin->peek() == 'E') {
|
||||
str += (char)cin->get();
|
||||
if (decDigits(str)) ok = true; // .3E2
|
||||
}
|
||||
else ok = true; // .3
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ok) {
|
||||
token = Token((float)atof(str.c_str()),loc);
|
||||
}
|
||||
else cin->unget(str.size());
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
|
||||
std::string str;
|
||||
if (decDigits(str)) {
|
||||
token = Token(atoi(str.c_str()),loc);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
|
||||
{
|
||||
std::string str;
|
||||
if (cin->peek() != '\"') return false;
|
||||
cin->drop();
|
||||
while (cin->peek() != '\"') {
|
||||
const int c = cin->get();
|
||||
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
|
||||
str += (char)c;
|
||||
}
|
||||
cin->drop();
|
||||
token = Token(str,Token::TY_STRING,loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
|
||||
{
|
||||
std::string str;
|
||||
if (!isAlpha(cin->peek())) return false;
|
||||
str += (char)cin->get();
|
||||
while (isAlphaNum(cin->peek())) str += (char)cin->get();
|
||||
token = Token(str,Token::TY_IDENTIFIER,loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TokenStream::skipSeparators()
|
||||
{
|
||||
/* skip separators */
|
||||
while (cin->peek() != EOF && isSeparator(cin->peek()))
|
||||
cin->drop();
|
||||
}
|
||||
|
||||
Token TokenStream::next()
|
||||
{
|
||||
Token token;
|
||||
skipSeparators();
|
||||
ParseLocation loc = cin->loc();
|
||||
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
|
||||
if (tryFloat (token,loc)) return token; /**< try to parse float */
|
||||
if (tryInt (token,loc)) return token; /**< try to parse integer */
|
||||
if (tryString (token,loc)) return token; /**< try to parse string */
|
||||
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
|
||||
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
|
||||
return Token((char)cin->get(),loc); /**< return invalid character token */
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,164 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "stream.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace embree
|
||||
{
|
||||
/*! token class */
|
||||
class Token
|
||||
{
|
||||
public:
|
||||
|
||||
enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
|
||||
|
||||
Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
|
||||
Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
|
||||
Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
|
||||
Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
|
||||
Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
|
||||
|
||||
static Token Eof() { return Token(); }
|
||||
static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
|
||||
static Token Str(std::string str) { return Token(str,TY_STRING); }
|
||||
static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
|
||||
|
||||
char Char() const {
|
||||
if (ty == TY_CHAR) return c;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": character expected");
|
||||
}
|
||||
|
||||
int Int() const {
|
||||
if (ty == TY_INT) return i;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": integer expected");
|
||||
}
|
||||
|
||||
float Float(bool cast = true) const {
|
||||
if (ty == TY_FLOAT) return f;
|
||||
if (ty == TY_INT && cast) return (float)i;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": float expected");
|
||||
}
|
||||
|
||||
std::string Identifier() const {
|
||||
if (ty == TY_IDENTIFIER) return str;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
|
||||
}
|
||||
|
||||
std::string String() const {
|
||||
if (ty == TY_STRING) return str;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": string expected");
|
||||
}
|
||||
|
||||
std::string Symbol() const {
|
||||
if (ty == TY_SYMBOL) return str;
|
||||
THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
|
||||
}
|
||||
|
||||
const ParseLocation& Location() const { return loc; }
|
||||
|
||||
friend bool operator==(const Token& a, const Token& b)
|
||||
{
|
||||
if (a.ty != b.ty) return false;
|
||||
if (a.ty == TY_CHAR) return a.c == b.c;
|
||||
if (a.ty == TY_INT) return a.i == b.i;
|
||||
if (a.ty == TY_FLOAT) return a.f == b.f;
|
||||
if (a.ty == TY_IDENTIFIER) return a.str == b.str;
|
||||
if (a.ty == TY_STRING) return a.str == b.str;
|
||||
if (a.ty == TY_SYMBOL) return a.str == b.str;
|
||||
return true;
|
||||
}
|
||||
|
||||
friend bool operator!=(const Token& a, const Token& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
friend bool operator <( const Token& a, const Token& b ) {
|
||||
if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
|
||||
if (a.ty == TY_CHAR) return a.c < b.c;
|
||||
if (a.ty == TY_INT) return a.i < b.i;
|
||||
if (a.ty == TY_FLOAT) return a.f < b.f;
|
||||
if (a.ty == TY_IDENTIFIER) return a.str < b.str;
|
||||
if (a.ty == TY_STRING) return a.str < b.str;
|
||||
if (a.ty == TY_SYMBOL) return a.str < b.str;
|
||||
return false;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& cout, const Token& t)
|
||||
{
|
||||
if (t.ty == TY_EOF) return cout << "eof";
|
||||
if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
|
||||
if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
|
||||
if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
|
||||
if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
|
||||
if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
|
||||
if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
|
||||
return cout << "unknown";
|
||||
}
|
||||
|
||||
private:
|
||||
Type ty; //< the type of the token
|
||||
union {
|
||||
char c; //< data for char tokens
|
||||
int i; //< data for int tokens
|
||||
float f; //< data for float tokens
|
||||
};
|
||||
std::string str; //< data for string and identifier tokens
|
||||
ParseLocation loc; //< the location the token is from
|
||||
};
|
||||
|
||||
/*! build full tokenizer that takes list of valid characters and keywords */
|
||||
class TokenStream : public Stream<Token>
|
||||
{
|
||||
public:
|
||||
|
||||
/*! shorthands for common sets of characters */
|
||||
static const std::string alpha;
|
||||
static const std::string ALPHA;
|
||||
static const std::string numbers;
|
||||
static const std::string separators;
|
||||
static const std::string stringChars;
|
||||
|
||||
public:
|
||||
TokenStream(const Ref<Stream<int> >& cin,
|
||||
const std::string& alpha, //< valid characters for identifiers
|
||||
const std::string& seps, //< characters that act as separators
|
||||
const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
|
||||
public:
|
||||
ParseLocation location() { return cin->loc(); }
|
||||
Token next();
|
||||
bool trySymbol(const std::string& symbol);
|
||||
|
||||
private:
|
||||
void skipSeparators();
|
||||
bool decDigits(std::string& str);
|
||||
bool decDigits1(std::string& str);
|
||||
bool trySymbols(Token& token, const ParseLocation& loc);
|
||||
bool tryFloat(Token& token, const ParseLocation& loc);
|
||||
bool tryInt(Token& token, const ParseLocation& loc);
|
||||
bool tryString(Token& token, const ParseLocation& loc);
|
||||
bool tryIdentifier(Token& token, const ParseLocation& loc);
|
||||
|
||||
Ref<Stream<int> > cin;
|
||||
bool isSepMap[256];
|
||||
bool isAlphaMap[256];
|
||||
bool isStringCharMap[256];
|
||||
std::vector<std::string> symbols;
|
||||
|
||||
/*! checks if a character is a separator */
|
||||
__forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
|
||||
|
||||
/*! checks if a character is a number */
|
||||
__forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
|
||||
|
||||
/*! checks if a character is valid inside a string */
|
||||
__forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
|
||||
|
||||
/*! checks if a character is legal for an identifier */
|
||||
__forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
|
||||
__forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
|
||||
};
|
||||
}
|
||||
@ -0,0 +1,361 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "linearspace2.h"
|
||||
#include "linearspace3.h"
|
||||
#include "quaternion.h"
|
||||
#include "bbox.h"
|
||||
#include "vec4.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
#define VectorT typename L::Vector
|
||||
#define ScalarT typename L::Vector::Scalar
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Affine Space
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L>
|
||||
struct AffineSpaceT
|
||||
{
|
||||
L l; /*< linear part of affine space */
|
||||
VectorT p; /*< affine part of affine space */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Constructors, Assignment, Cast, Copy Operations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline AffineSpaceT ( ) { }
|
||||
__forceinline AffineSpaceT ( const AffineSpaceT& other ) { l = other.l; p = other.p; }
|
||||
__forceinline AffineSpaceT ( const L & other ) { l = other ; p = VectorT(zero); }
|
||||
__forceinline AffineSpaceT& operator=( const AffineSpaceT& other ) { l = other.l; p = other.p; return *this; }
|
||||
|
||||
__forceinline AffineSpaceT( const VectorT& vx, const VectorT& vy, const VectorT& vz, const VectorT& p ) : l(vx,vy,vz), p(p) {}
|
||||
__forceinline AffineSpaceT( const L& l, const VectorT& p ) : l(l), p(p) {}
|
||||
|
||||
template<typename L1> __forceinline AffineSpaceT( const AffineSpaceT<L1>& s ) : l(s.l), p(s.p) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline AffineSpaceT( ZeroTy ) : l(zero), p(zero) {}
|
||||
__forceinline AffineSpaceT( OneTy ) : l(one), p(zero) {}
|
||||
|
||||
/*! return matrix for scaling */
|
||||
static __forceinline AffineSpaceT scale(const VectorT& s) { return L::scale(s); }
|
||||
|
||||
/*! return matrix for translation */
|
||||
static __forceinline AffineSpaceT translate(const VectorT& p) { return AffineSpaceT(one,p); }
|
||||
|
||||
/*! return matrix for rotation, only in 2D */
|
||||
static __forceinline AffineSpaceT rotate(const ScalarT& r) { return L::rotate(r); }
|
||||
|
||||
/*! return matrix for rotation around arbitrary point (2D) or axis (3D) */
|
||||
static __forceinline AffineSpaceT rotate(const VectorT& u, const ScalarT& r) { return L::rotate(u,r); }
|
||||
|
||||
/*! return matrix for rotation around arbitrary axis and point, only in 3D */
|
||||
static __forceinline AffineSpaceT rotate(const VectorT& p, const VectorT& u, const ScalarT& r) { return translate(+p) * rotate(u,r) * translate(-p); }
|
||||
|
||||
/*! return matrix for looking at given point, only in 3D */
|
||||
static __forceinline AffineSpaceT lookat(const VectorT& eye, const VectorT& point, const VectorT& up) {
|
||||
VectorT Z = normalize(point-eye);
|
||||
VectorT U = normalize(cross(up,Z));
|
||||
VectorT V = normalize(cross(Z,U));
|
||||
return AffineSpaceT(L(U,V,Z),eye);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// template specialization to get correct identity matrix for type AffineSpace3fa
|
||||
template<>
|
||||
__forceinline AffineSpaceT<LinearSpace3ff>::AffineSpaceT( OneTy ) : l(one), p(0.f, 0.f, 0.f, 1.f) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Unary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L> __forceinline AffineSpaceT<L> operator -( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(-a.l,-a.p); }
|
||||
template<typename L> __forceinline AffineSpaceT<L> operator +( const AffineSpaceT<L>& a ) { return AffineSpaceT<L>(+a.l,+a.p); }
|
||||
template<typename L> __forceinline AffineSpaceT<L> rcp( const AffineSpaceT<L>& a ) { L il = rcp(a.l); return AffineSpaceT<L>(il,-(il*a.p)); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Binary Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator +( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l+b.l,a.p+b.p); }
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator -( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l-b.l,a.p-b.p); }
|
||||
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator *( const ScalarT & a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a*b.l,a*b.p); }
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator *( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return AffineSpaceT<L>(a.l*b.l,a.l*b.p+a.p); }
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a * rcp(b); }
|
||||
template<typename L> __forceinline const AffineSpaceT<L> operator /( const AffineSpaceT<L>& a, const ScalarT & b ) { return a * rcp(b); }
|
||||
|
||||
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a * b; }
|
||||
template<typename L> __forceinline AffineSpaceT<L>& operator *=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a * b; }
|
||||
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a = a / b; }
|
||||
template<typename L> __forceinline AffineSpaceT<L>& operator /=( AffineSpaceT<L>& a, const ScalarT & b ) { return a = a / b; }
|
||||
|
||||
template<typename L> __forceinline VectorT xfmPoint (const AffineSpaceT<L>& m, const VectorT& p) { return madd(VectorT(p.x),m.l.vx,madd(VectorT(p.y),m.l.vy,madd(VectorT(p.z),m.l.vz,m.p))); }
|
||||
template<typename L> __forceinline VectorT xfmVector(const AffineSpaceT<L>& m, const VectorT& v) { return xfmVector(m.l,v); }
|
||||
template<typename L> __forceinline VectorT xfmNormal(const AffineSpaceT<L>& m, const VectorT& n) { return xfmNormal(m.l,n); }
|
||||
|
||||
__forceinline const BBox<Vec3fa> xfmBounds(const AffineSpaceT<LinearSpace3<Vec3fa> >& m, const BBox<Vec3fa>& b)
|
||||
{
|
||||
BBox3fa dst = empty;
|
||||
const Vec3fa p0(b.lower.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p0));
|
||||
const Vec3fa p1(b.lower.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p1));
|
||||
const Vec3fa p2(b.lower.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p2));
|
||||
const Vec3fa p3(b.lower.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p3));
|
||||
const Vec3fa p4(b.upper.x,b.lower.y,b.lower.z); dst.extend(xfmPoint(m,p4));
|
||||
const Vec3fa p5(b.upper.x,b.lower.y,b.upper.z); dst.extend(xfmPoint(m,p5));
|
||||
const Vec3fa p6(b.upper.x,b.upper.y,b.lower.z); dst.extend(xfmPoint(m,p6));
|
||||
const Vec3fa p7(b.upper.x,b.upper.y,b.upper.z); dst.extend(xfmPoint(m,p7));
|
||||
return dst;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Comparison Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L> __forceinline bool operator ==( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l == b.l && a.p == b.p; }
|
||||
template<typename L> __forceinline bool operator !=( const AffineSpaceT<L>& a, const AffineSpaceT<L>& b ) { return a.l != b.l || a.p != b.p; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Select
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L> __forceinline AffineSpaceT<L> select ( const typename L::Vector::Scalar::Bool& s, const AffineSpaceT<L>& t, const AffineSpaceT<L>& f ) {
|
||||
return AffineSpaceT<L>(select(s,t.l,f.l),select(s,t.p,f.p));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Output Operators
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename L> static embree_ostream operator<<(embree_ostream cout, const AffineSpaceT<L>& m) {
|
||||
return cout << "{ l = " << m.l << ", p = " << m.p << " }";
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template Instantiations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef AffineSpaceT<LinearSpace2f> AffineSpace2f;
|
||||
typedef AffineSpaceT<LinearSpace3f> AffineSpace3f;
|
||||
typedef AffineSpaceT<LinearSpace3fa> AffineSpace3fa;
|
||||
typedef AffineSpaceT<LinearSpace3fx> AffineSpace3fx;
|
||||
typedef AffineSpaceT<LinearSpace3ff> AffineSpace3ff;
|
||||
typedef AffineSpaceT<Quaternion3f > OrthonormalSpace3f;
|
||||
|
||||
template<int N> using AffineSpace3vf = AffineSpaceT<LinearSpace3<Vec3<vfloat<N>>>>;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<4>>>> AffineSpace3vf4;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<8>>>> AffineSpace3vf8;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec3<vfloat<16>>>> AffineSpace3vf16;
|
||||
|
||||
template<int N> using AffineSpace3vff = AffineSpaceT<LinearSpace3<Vec4<vfloat<N>>>>;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<4>>>> AffineSpace3vfa4;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<8>>>> AffineSpace3vfa8;
|
||||
typedef AffineSpaceT<LinearSpace3<Vec4<vfloat<16>>>> AffineSpace3vfa16;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// Interpolation
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
template<typename T, typename R>
|
||||
__forceinline AffineSpaceT<T> lerp(const AffineSpaceT<T>& M0,
|
||||
const AffineSpaceT<T>& M1,
|
||||
const R& t)
|
||||
{
|
||||
return AffineSpaceT<T>(lerp(M0.l,M1.l,t),lerp(M0.p,M1.p,t));
|
||||
}
|
||||
|
||||
// slerp interprets the 16 floats of the matrix M = D * R * S as components of
|
||||
// three matrizes (D, R, S) that are interpolated individually.
|
||||
template<typename T> __forceinline AffineSpaceT<LinearSpace3<Vec3<T>>>
|
||||
slerp(const AffineSpaceT<LinearSpace3<Vec4<T>>>& M0,
|
||||
const AffineSpaceT<LinearSpace3<Vec4<T>>>& M1,
|
||||
const T& t)
|
||||
{
|
||||
QuaternionT<T> q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
|
||||
QuaternionT<T> q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
|
||||
QuaternionT<T> q = slerp(q0, q1, t);
|
||||
|
||||
AffineSpaceT<LinearSpace3<Vec3<T>>> S = lerp(M0, M1, t);
|
||||
AffineSpaceT<LinearSpace3<Vec3<T>>> D(one);
|
||||
D.p.x = S.l.vx.y;
|
||||
D.p.y = S.l.vx.z;
|
||||
D.p.z = S.l.vy.z;
|
||||
S.l.vx.y = 0;
|
||||
S.l.vx.z = 0;
|
||||
S.l.vy.z = 0;
|
||||
|
||||
AffineSpaceT<LinearSpace3<Vec3<T>>> R = LinearSpace3<Vec3<T>>(q);
|
||||
return D * R * S;
|
||||
}
|
||||
|
||||
// this is a specialized version for Vec3fa because that does
|
||||
// not play along nicely with the other templated Vec3/Vec4 types
|
||||
__forceinline AffineSpace3fa slerp(const AffineSpace3ff& M0,
|
||||
const AffineSpace3ff& M1,
|
||||
const float& t)
|
||||
{
|
||||
Quaternion3f q0(M0.p.w, M0.l.vx.w, M0.l.vy.w, M0.l.vz.w);
|
||||
Quaternion3f q1(M1.p.w, M1.l.vx.w, M1.l.vy.w, M1.l.vz.w);
|
||||
Quaternion3f q = slerp(q0, q1, t);
|
||||
|
||||
AffineSpace3fa S = lerp(M0, M1, t);
|
||||
AffineSpace3fa D(one);
|
||||
D.p.x = S.l.vx.y;
|
||||
D.p.y = S.l.vx.z;
|
||||
D.p.z = S.l.vy.z;
|
||||
S.l.vx.y = 0;
|
||||
S.l.vx.z = 0;
|
||||
S.l.vy.z = 0;
|
||||
|
||||
AffineSpace3fa R = LinearSpace3fa(q);
|
||||
return D * R * S;
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa quaternionDecompositionToAffineSpace(const AffineSpace3ff& qd)
|
||||
{
|
||||
// compute affine transform from quaternion decomposition
|
||||
Quaternion3f q(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
|
||||
AffineSpace3fa M = qd;
|
||||
AffineSpace3fa D(one);
|
||||
D.p.x = M.l.vx.y;
|
||||
D.p.y = M.l.vx.z;
|
||||
D.p.z = M.l.vy.z;
|
||||
M.l.vx.y = 0;
|
||||
M.l.vx.z = 0;
|
||||
M.l.vy.z = 0;
|
||||
AffineSpace3fa R = LinearSpace3fa(q);
|
||||
return D * R * M;
|
||||
}
|
||||
|
||||
__forceinline void quaternionDecomposition(const AffineSpace3ff& qd, Vec3fa& T, Quaternion3f& q, AffineSpace3fa& S)
|
||||
{
|
||||
q = Quaternion3f(qd.p.w, qd.l.vx.w, qd.l.vy.w, qd.l.vz.w);
|
||||
S = qd;
|
||||
T.x = qd.l.vx.y;
|
||||
T.y = qd.l.vx.z;
|
||||
T.z = qd.l.vy.z;
|
||||
S.l.vx.y = 0;
|
||||
S.l.vx.z = 0;
|
||||
S.l.vy.z = 0;
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fx quaternionDecomposition(Vec3fa const& T, Quaternion3f const& q, AffineSpace3fa const& S)
|
||||
{
|
||||
AffineSpace3ff M = S;
|
||||
M.l.vx.w = q.i;
|
||||
M.l.vy.w = q.j;
|
||||
M.l.vz.w = q.k;
|
||||
M.p.w = q.r;
|
||||
M.l.vx.y = T.x;
|
||||
M.l.vx.z = T.y;
|
||||
M.l.vy.z = T.z;
|
||||
return M;
|
||||
}
|
||||
|
||||
struct __aligned(16) QuaternionDecomposition
|
||||
{
|
||||
float scale_x = 1.f;
|
||||
float scale_y = 1.f;
|
||||
float scale_z = 1.f;
|
||||
float skew_xy = 0.f;
|
||||
float skew_xz = 0.f;
|
||||
float skew_yz = 0.f;
|
||||
float shift_x = 0.f;
|
||||
float shift_y = 0.f;
|
||||
float shift_z = 0.f;
|
||||
float quaternion_r = 1.f;
|
||||
float quaternion_i = 0.f;
|
||||
float quaternion_j = 0.f;
|
||||
float quaternion_k = 0.f;
|
||||
float translation_x = 0.f;
|
||||
float translation_y = 0.f;
|
||||
float translation_z = 0.f;
|
||||
};
|
||||
|
||||
__forceinline QuaternionDecomposition quaternionDecomposition(AffineSpace3ff const& M)
|
||||
{
|
||||
QuaternionDecomposition qd;
|
||||
qd.scale_x = M.l.vx.x;
|
||||
qd.scale_y = M.l.vy.y;
|
||||
qd.scale_z = M.l.vz.z;
|
||||
qd.shift_x = M.p.x;
|
||||
qd.shift_y = M.p.y;
|
||||
qd.shift_z = M.p.z;
|
||||
qd.translation_x = M.l.vx.y;
|
||||
qd.translation_y = M.l.vx.z;
|
||||
qd.translation_z = M.l.vy.z;
|
||||
qd.skew_xy = M.l.vy.x;
|
||||
qd.skew_xz = M.l.vz.x;
|
||||
qd.skew_yz = M.l.vz.y;
|
||||
qd.quaternion_r = M.p.w;
|
||||
qd.quaternion_i = M.l.vx.w;
|
||||
qd.quaternion_j = M.l.vy.w;
|
||||
qd.quaternion_k = M.l.vz.w;
|
||||
return qd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
* ! Template Specialization for 2D: return matrix for rotation around point
|
||||
* (rotation around arbitrarty vector is not meaningful in 2D)
|
||||
*/
|
||||
template<> __forceinline
|
||||
AffineSpace2f AffineSpace2f::rotate(const Vec2f& p, const float& r) {
|
||||
return translate(+p)*AffineSpace2f(LinearSpace2f::rotate(r))*translate(-p);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Similarity Transform
|
||||
//
|
||||
// checks, if M is a similarity transformation, i.e if there exists a factor D
|
||||
// such that for all x,y: distance(Mx, My) = D * distance(x, y)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__forceinline bool similarityTransform(const AffineSpace3fa& M, float* D)
|
||||
{
|
||||
if (D) *D = 0.f;
|
||||
if (abs(dot(M.l.vx, M.l.vy)) > 1e-5f) return false;
|
||||
if (abs(dot(M.l.vx, M.l.vz)) > 1e-5f) return false;
|
||||
if (abs(dot(M.l.vy, M.l.vz)) > 1e-5f) return false;
|
||||
|
||||
const float D_x = dot(M.l.vx, M.l.vx);
|
||||
const float D_y = dot(M.l.vy, M.l.vy);
|
||||
const float D_z = dot(M.l.vz, M.l.vz);
|
||||
|
||||
if (abs(D_x - D_y) > 1e-5f ||
|
||||
abs(D_x - D_z) > 1e-5f ||
|
||||
abs(D_y - D_z) > 1e-5f)
|
||||
return false;
|
||||
|
||||
if (D) *D = sqrtf(D_x);
|
||||
return true;
|
||||
}
|
||||
|
||||
__forceinline void AffineSpace3fa_store_unaligned(const AffineSpace3fa &source, AffineSpace3fa* ptr)
|
||||
{
|
||||
Vec3fa::storeu(&ptr->l.vx, source.l.vx);
|
||||
Vec3fa::storeu(&ptr->l.vy, source.l.vy);
|
||||
Vec3fa::storeu(&ptr->l.vz, source.l.vz);
|
||||
Vec3fa::storeu(&ptr->p, source.p);
|
||||
}
|
||||
|
||||
__forceinline AffineSpace3fa AffineSpace3fa_load_unaligned(AffineSpace3fa* ptr)
|
||||
{
|
||||
AffineSpace3fa space;
|
||||
space.l.vx = Vec3fa::loadu(&ptr->l.vx);
|
||||
space.l.vy = Vec3fa::loadu(&ptr->l.vy);
|
||||
space.l.vz = Vec3fa::loadu(&ptr->l.vz);
|
||||
space.p = Vec3fa::loadu(&ptr->p);
|
||||
return space;
|
||||
}
|
||||
|
||||
#undef VectorT
|
||||
#undef ScalarT
|
||||
}
|
||||
@ -0,0 +1,331 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "vec2.h"
|
||||
#include "vec3.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
namespace internal {
|
||||
|
||||
template <typename T> __forceinline T divideByTwo(const T& v) { return v / T(2); }
|
||||
template <> __forceinline float divideByTwo<float>(const float& v) { return v * 0.5f; }
|
||||
template <> __forceinline double divideByTwo<double>(const double& v) { return v * 0.5; }
|
||||
|
||||
} // namespace internal
|
||||
template<typename T>
|
||||
struct BBox
|
||||
{
|
||||
T lower, upper;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Construction
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline BBox ( ) { }
|
||||
template<typename T1>
|
||||
__forceinline BBox ( const BBox<T1>& other ) : lower(other.lower), upper(other.upper) {}
|
||||
__forceinline BBox& operator=( const BBox& other ) { lower = other.lower; upper = other.upper; return *this; }
|
||||
|
||||
__forceinline BBox ( const T& v ) : lower(v), upper(v) {}
|
||||
__forceinline BBox ( const T& lower, const T& upper ) : lower(lower), upper(upper) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Extending Bounds
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline const BBox& extend(const BBox& other) { lower = min(lower,other.lower); upper = max(upper,other.upper); return *this; }
|
||||
__forceinline const BBox& extend(const T & other) { lower = min(lower,other ); upper = max(upper,other ); return *this; }
|
||||
|
||||
/*! tests if box is empty */
|
||||
__forceinline bool empty() const { for (int i=0; i<T::N; i++) if (lower[i] > upper[i]) return true; return false; }
|
||||
|
||||
/*! computes the size of the box */
|
||||
__forceinline T size() const { return upper - lower; }
|
||||
|
||||
/*! computes the center of the box */
|
||||
__forceinline T center() const { return internal::divideByTwo<T>(lower+upper); }
|
||||
|
||||
/*! computes twice the center of the box */
|
||||
__forceinline T center2() const { return lower+upper; }
|
||||
|
||||
/*! merges two boxes */
|
||||
__forceinline static const BBox merge (const BBox& a, const BBox& b) {
|
||||
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
|
||||
}
|
||||
|
||||
/*! enlarge box by some scaling factor */
|
||||
__forceinline BBox enlarge_by(const float a) const {
|
||||
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline BBox( EmptyTy ) : lower(pos_inf), upper(neg_inf) {}
|
||||
__forceinline BBox( FullTy ) : lower(neg_inf), upper(pos_inf) {}
|
||||
__forceinline BBox( FalseTy ) : lower(pos_inf), upper(neg_inf) {}
|
||||
__forceinline BBox( TrueTy ) : lower(neg_inf), upper(pos_inf) {}
|
||||
__forceinline BBox( NegInfTy ): lower(pos_inf), upper(neg_inf) {}
|
||||
__forceinline BBox( PosInfTy ): lower(neg_inf), upper(pos_inf) {}
|
||||
};
|
||||
|
||||
template<> __forceinline bool BBox<float>::empty() const {
|
||||
return lower > upper;
|
||||
}
|
||||
|
||||
#if defined(__SSE__)
|
||||
template<> __forceinline bool BBox<Vec3fa>::empty() const {
|
||||
return !all(le_mask(lower,upper));
|
||||
}
|
||||
template<> __forceinline bool BBox<Vec3fx>::empty() const {
|
||||
return !all(le_mask(lower,upper));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*! tests if box is finite */
|
||||
__forceinline bool isvalid( const BBox<Vec3fa>& v ) {
|
||||
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)));
|
||||
}
|
||||
|
||||
/*! tests if box is finite and non-empty*/
|
||||
__forceinline bool isvalid_non_empty( const BBox<Vec3fa>& v ) {
|
||||
return all(gt_mask(v.lower,Vec3fa_t(-FLT_LARGE)) & lt_mask(v.upper,Vec3fa_t(+FLT_LARGE)) & le_mask(v.lower,v.upper));
|
||||
}
|
||||
|
||||
/*! tests if box has finite entries */
|
||||
__forceinline bool is_finite( const BBox<Vec3fa>& b) {
|
||||
return is_finite(b.lower) && is_finite(b.upper);
|
||||
}
|
||||
|
||||
/*! test if point contained in box */
|
||||
__forceinline bool inside ( const BBox<Vec3fa>& b, const Vec3fa& p ) { return all(ge_mask(p,b.lower) & le_mask(p,b.upper)); }
|
||||
|
||||
/*! computes the center of the box */
|
||||
template<typename T> __forceinline const T center2(const BBox<T>& box) { return box.lower + box.upper; }
|
||||
template<typename T> __forceinline const T center (const BBox<T>& box) { return internal::divideByTwo<T>(center2(box)); }
|
||||
|
||||
/*! computes the volume of a bounding box */
|
||||
__forceinline float volume ( const BBox<Vec3fa>& b ) { return reduce_mul(b.size()); }
|
||||
__forceinline float safeVolume( const BBox<Vec3fa>& b ) { if (b.empty()) return 0.0f; else return volume(b); }
|
||||
|
||||
/*! computes the volume of a bounding box */
|
||||
__forceinline float volume( const BBox<Vec3f>& b ) { return reduce_mul(b.size()); }
|
||||
|
||||
/*! computes the surface area of a bounding box */
|
||||
template<typename T> __forceinline const T area( const BBox<Vec2<T> >& b ) { const Vec2<T> d = b.size(); return d.x*d.y; }
|
||||
|
||||
template<typename T> __forceinline const T halfArea( const BBox<Vec3<T> >& b ) { return halfArea(b.size()); }
|
||||
template<typename T> __forceinline const T area( const BBox<Vec3<T> >& b ) { return T(2)*halfArea(b); }
|
||||
|
||||
__forceinline float halfArea( const BBox<Vec3fa>& b ) { return halfArea(b.size()); }
|
||||
__forceinline float area( const BBox<Vec3fa>& b ) { return 2.0f*halfArea(b); }
|
||||
|
||||
__forceinline float halfArea( const BBox<Vec3fx>& b ) { return halfArea(b.size()); }
|
||||
__forceinline float area( const BBox<Vec3fx>& b ) { return 2.0f*halfArea(b); }
|
||||
|
||||
template<typename Vec> __forceinline float safeArea( const BBox<Vec>& b ) { if (b.empty()) return 0.0f; else return area(b); }
|
||||
|
||||
template<typename T> __forceinline float expectedApproxHalfArea(const BBox<T>& box) {
|
||||
return halfArea(box);
|
||||
}
|
||||
|
||||
/*! merges bounding boxes and points */
|
||||
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const T& b ) { return BBox<T>(min(a.lower, b ), max(a.upper, b )); }
|
||||
template<typename T> __forceinline const BBox<T> merge( const T& a, const BBox<T>& b ) { return BBox<T>(min(a , b.lower), max(a , b.upper)); }
|
||||
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(min(a.lower, b.lower), max(a.upper, b.upper)); }
|
||||
|
||||
/*! Merges three boxes. */
|
||||
template<typename T> __forceinline const BBox<T> merge( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return merge(a,merge(b,c)); }
|
||||
|
||||
/*! Merges four boxes. */
|
||||
template<typename T> __forceinline BBox<T> merge(const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d) {
|
||||
return merge(merge(a,b),merge(c,d));
|
||||
}
|
||||
|
||||
/*! Comparison Operators */
|
||||
template<typename T> __forceinline bool operator==( const BBox<T>& a, const BBox<T>& b ) { return a.lower == b.lower && a.upper == b.upper; }
|
||||
template<typename T> __forceinline bool operator!=( const BBox<T>& a, const BBox<T>& b ) { return a.lower != b.lower || a.upper != b.upper; }
|
||||
|
||||
/*! scaling */
|
||||
template<typename T> __forceinline BBox<T> operator *( const float& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
|
||||
template<typename T> __forceinline BBox<T> operator *( const T& a, const BBox<T>& b ) { return BBox<T>(a*b.lower,a*b.upper); }
|
||||
|
||||
/*! translations */
|
||||
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower+b.lower,a.upper+b.upper); }
|
||||
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(a.lower-b.lower,a.upper-b.upper); }
|
||||
template<typename T> __forceinline BBox<T> operator +( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower+b ,a.upper+b ); }
|
||||
template<typename T> __forceinline BBox<T> operator -( const BBox<T>& a, const T & b ) { return BBox<T>(a.lower-b ,a.upper-b ); }
|
||||
|
||||
/*! extension */
|
||||
template<typename T> __forceinline BBox<T> enlarge(const BBox<T>& a, const T& b) { return BBox<T>(a.lower-b, a.upper+b); }
|
||||
|
||||
/*! intersect bounding boxes */
|
||||
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b ) { return BBox<T>(max(a.lower, b.lower), min(a.upper, b.upper)); }
|
||||
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c ) { return intersect(a,intersect(b,c)); }
|
||||
template<typename T> __forceinline const BBox<T> intersect( const BBox<T>& a, const BBox<T>& b, const BBox<T>& c, const BBox<T>& d ) { return intersect(intersect(a,b),intersect(c,d)); }
|
||||
|
||||
/*! subtract bounds from each other */
|
||||
template<typename T> __forceinline void subtract(const BBox<T>& a, const BBox<T>& b, BBox<T>& c, BBox<T>& d)
|
||||
{
|
||||
c.lower = a.lower;
|
||||
c.upper = min(a.upper,b.lower);
|
||||
d.lower = max(a.lower,b.upper);
|
||||
d.upper = a.upper;
|
||||
}
|
||||
|
||||
/*! tests if bounding boxes (and points) are disjoint (empty intersection) */
|
||||
template<typename T> __inline bool disjoint( const BBox<T>& a, const BBox<T>& b ) { return intersect(a,b).empty(); }
|
||||
template<typename T> __inline bool disjoint( const BBox<T>& a, const T& b ) { return disjoint(a,BBox<T>(b)); }
|
||||
template<typename T> __inline bool disjoint( const T& a, const BBox<T>& b ) { return disjoint(BBox<T>(a),b); }
|
||||
|
||||
/*! tests if bounding boxes (and points) are conjoint (non-empty intersection) */
|
||||
template<typename T> __inline bool conjoint( const BBox<T>& a, const BBox<T>& b ) { return !intersect(a,b).empty(); }
|
||||
template<typename T> __inline bool conjoint( const BBox<T>& a, const T& b ) { return conjoint(a,BBox<T>(b)); }
|
||||
template<typename T> __inline bool conjoint( const T& a, const BBox<T>& b ) { return conjoint(BBox<T>(a),b); }
|
||||
|
||||
/*! subset relation */
|
||||
template<typename T> __inline bool subset( const BBox<T>& a, const BBox<T>& b )
|
||||
{
|
||||
for ( size_t i = 0; i < T::N; i++ ) if ( a.lower[i] < b.lower[i] ) return false;
|
||||
for ( size_t i = 0; i < T::N; i++ ) if ( a.upper[i] > b.upper[i] ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<> __inline bool subset( const BBox<Vec3fa>& a, const BBox<Vec3fa>& b ) {
|
||||
return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
|
||||
}
|
||||
|
||||
template<> __inline bool subset( const BBox<Vec3fx>& a, const BBox<Vec3fx>& b ) {
|
||||
return all(ge_mask(a.lower,b.lower)) & all(le_mask(a.upper,b.upper));
|
||||
}
|
||||
|
||||
/*! blending */
|
||||
template<typename T>
|
||||
__forceinline BBox<T> lerp(const BBox<T>& b0, const BBox<T>& b1, const float t) {
|
||||
return BBox<T>(lerp(b0.lower,b1.lower,t),lerp(b0.upper,b1.upper,t));
|
||||
}
|
||||
|
||||
/*! output operator */
|
||||
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const BBox<T>& box) {
|
||||
return cout << "[" << box.lower << "; " << box.upper << "]";
|
||||
}
|
||||
|
||||
/*! default template instantiations */
|
||||
typedef BBox<float> BBox1f;
|
||||
typedef BBox<Vec2f> BBox2f;
|
||||
typedef BBox<Vec2fa> BBox2fa;
|
||||
typedef BBox<Vec3f> BBox3f;
|
||||
typedef BBox<Vec3fa> BBox3fa;
|
||||
typedef BBox<Vec3fx> BBox3fx;
|
||||
typedef BBox<Vec3ff> BBox3ff;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// SSE / AVX / MIC specializations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined __SSE__
|
||||
#include "../simd/sse.h"
|
||||
#endif
|
||||
|
||||
#if defined __AVX__
|
||||
#include "../simd/avx.h"
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512F__)
|
||||
#include "../simd/avx512.h"
|
||||
#endif
|
||||
|
||||
namespace embree
|
||||
{
|
||||
template<int N>
|
||||
__forceinline BBox<Vec3<vfloat<N>>> transpose(const BBox3fa* bounds);
|
||||
|
||||
template<>
|
||||
__forceinline BBox<Vec3<vfloat4>> transpose<4>(const BBox3fa* bounds)
|
||||
{
|
||||
BBox<Vec3<vfloat4>> dest;
|
||||
|
||||
transpose((vfloat4&)bounds[0].lower,
|
||||
(vfloat4&)bounds[1].lower,
|
||||
(vfloat4&)bounds[2].lower,
|
||||
(vfloat4&)bounds[3].lower,
|
||||
dest.lower.x,
|
||||
dest.lower.y,
|
||||
dest.lower.z);
|
||||
|
||||
transpose((vfloat4&)bounds[0].upper,
|
||||
(vfloat4&)bounds[1].upper,
|
||||
(vfloat4&)bounds[2].upper,
|
||||
(vfloat4&)bounds[3].upper,
|
||||
dest.upper.x,
|
||||
dest.upper.y,
|
||||
dest.upper.z);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template<>
|
||||
__forceinline BBox<Vec3<vfloat8>> transpose<8>(const BBox3fa* bounds)
|
||||
{
|
||||
BBox<Vec3<vfloat8>> dest;
|
||||
|
||||
transpose((vfloat4&)bounds[0].lower,
|
||||
(vfloat4&)bounds[1].lower,
|
||||
(vfloat4&)bounds[2].lower,
|
||||
(vfloat4&)bounds[3].lower,
|
||||
(vfloat4&)bounds[4].lower,
|
||||
(vfloat4&)bounds[5].lower,
|
||||
(vfloat4&)bounds[6].lower,
|
||||
(vfloat4&)bounds[7].lower,
|
||||
dest.lower.x,
|
||||
dest.lower.y,
|
||||
dest.lower.z);
|
||||
|
||||
transpose((vfloat4&)bounds[0].upper,
|
||||
(vfloat4&)bounds[1].upper,
|
||||
(vfloat4&)bounds[2].upper,
|
||||
(vfloat4&)bounds[3].upper,
|
||||
(vfloat4&)bounds[4].upper,
|
||||
(vfloat4&)bounds[5].upper,
|
||||
(vfloat4&)bounds[6].upper,
|
||||
(vfloat4&)bounds[7].upper,
|
||||
dest.upper.x,
|
||||
dest.upper.y,
|
||||
dest.upper.z);
|
||||
|
||||
return dest;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<int N>
|
||||
__forceinline BBox3fa merge(const BBox3fa* bounds);
|
||||
|
||||
template<>
|
||||
__forceinline BBox3fa merge<4>(const BBox3fa* bounds)
|
||||
{
|
||||
const Vec3fa lower = min(min(bounds[0].lower,bounds[1].lower),
|
||||
min(bounds[2].lower,bounds[3].lower));
|
||||
const Vec3fa upper = max(max(bounds[0].upper,bounds[1].upper),
|
||||
max(bounds[2].upper,bounds[3].upper));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
template<>
|
||||
__forceinline BBox3fa merge<8>(const BBox3fa* bounds)
|
||||
{
|
||||
const Vec3fa lower = min(min(min(bounds[0].lower,bounds[1].lower),min(bounds[2].lower,bounds[3].lower)),
|
||||
min(min(bounds[4].lower,bounds[5].lower),min(bounds[6].lower,bounds[7].lower)));
|
||||
const Vec3fa upper = max(max(max(bounds[0].upper,bounds[1].upper),max(bounds[2].upper,bounds[3].upper)),
|
||||
max(max(bounds[4].upper,bounds[5].upper),max(bounds[6].upper,bounds[7].upper)));
|
||||
return BBox3fa(lower,upper);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -0,0 +1,47 @@
|
||||
// Copyright 2009-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "math.h"
|
||||
|
||||
namespace embree
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// RGB Color Class
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T> struct Col3
|
||||
{
|
||||
T r, g, b;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Construction
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Col3 ( ) { }
|
||||
__forceinline Col3 ( const Col3& other ) { r = other.r; g = other.g; b = other.b; }
|
||||
__forceinline Col3& operator=( const Col3& other ) { r = other.r; g = other.g; b = other.b; return *this; }
|
||||
|
||||
__forceinline explicit Col3 (const T& v) : r(v), g(v), b(v) {}
|
||||
__forceinline Col3 (const T& r, const T& g, const T& b) : r(r), g(g), b(b) {}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// Constants
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__forceinline Col3 (ZeroTy) : r(zero) , g(zero) , b(zero) {}
|
||||
__forceinline Col3 (OneTy) : r(one) , g(one) , b(one) {}
|
||||
__forceinline Col3 (PosInfTy) : r(pos_inf), g(pos_inf), b(pos_inf) {}
|
||||
__forceinline Col3 (NegInfTy) : r(neg_inf), g(neg_inf), b(neg_inf) {}
|
||||
};
|
||||
|
||||
/*! output operator */
|
||||
template<typename T> __forceinline embree_ostream operator<<(embree_ostream cout, const Col3<T>& a) {
|
||||
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
|
||||
}
|
||||
|
||||
/*! default template instantiations */
|
||||
typedef Col3<unsigned char> Col3uc;
|
||||
typedef Col3<float > Col3f;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue