You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
391 lines
13 KiB
391 lines
13 KiB
/****************************************************************************
|
|
Copyright (c) 2010 cocos2d-x.org
|
|
Copyright (c) 2013-2016 Chukong Technologies Inc.
|
|
Copyright (c) 2017-2023 Xiamen Yaji Software Co., Ltd.
|
|
|
|
http://www.cocos.com
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights to
|
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
of the Software, and to permit persons to whom the Software is furnished to do so,
|
|
subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
****************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <bitset>
|
|
#include <cerrno>
|
|
#include <climits>
|
|
#include <limits>
|
|
#include "base/Macros.h"
|
|
#include "base/TypeDef.h"
|
|
#include "base/std/container/string.h"
|
|
#include "base/std/container/vector.h"
|
|
/** @file ccUtils.h
|
|
Misc free functions
|
|
*/
|
|
|
|
namespace cc {
|
|
namespace utils {
|
|
|
|
CC_DLL ccstd::string getStacktrace(uint32_t skip = 0, uint32_t maxDepth = UINT32_MAX);
|
|
|
|
/**
|
|
* Returns the Next Power of Two value.
|
|
* Examples:
|
|
* - If "value" is 15, it will return 16.
|
|
* - If "value" is 16, it will return 16.
|
|
* - If "value" is 17, it will return 32.
|
|
* @param value The value to get next power of two.
|
|
* @return Returns the next power of two value.
|
|
* @since v0.99.5
|
|
*/
|
|
CC_DLL uint32_t nextPOT(uint32_t x);
|
|
|
|
/**
|
|
* Same to ::atof, but strip the string, remain 7 numbers after '.' before call atof.
|
|
* Why we need this? Because in android c++_static, atof ( and std::atof ) is unsupported for numbers have long decimal part and contain
|
|
* several numbers can approximate to 1 ( like 90.099998474121094 ), it will return inf. This function is used to fix this bug.
|
|
* @param str The string be to converted to double.
|
|
* @return Returns converted value of a string.
|
|
*/
|
|
CC_DLL double atof(const char *str);
|
|
|
|
#pragma warning(disable : 4146)
|
|
template <typename T, typename = typename std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
|
inline T getLowestBit(T mask) {
|
|
return mask & (-mask);
|
|
}
|
|
#pragma warning(default : 4146)
|
|
|
|
template <typename T, typename = typename std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value>>
|
|
inline T clearLowestBit(T mask) {
|
|
return mask & (mask - 1);
|
|
}
|
|
|
|
// v must be power of 2
|
|
inline uint32_t getBitPosition(uint32_t v) {
|
|
if (!v) return 0;
|
|
uint32_t c = 32;
|
|
if (v & 0x0000FFFF) c -= 16;
|
|
if (v & 0x00FF00FF) c -= 8;
|
|
if (v & 0x0F0F0F0F) c -= 4;
|
|
if (v & 0x33333333) c -= 2;
|
|
if (v & 0x55555555) c -= 1;
|
|
return c;
|
|
}
|
|
|
|
// v must be power of 2
|
|
inline uint64_t getBitPosition(uint64_t v) {
|
|
if (!v) return 0;
|
|
uint64_t c = 64;
|
|
if (v & 0x00000000FFFFFFFFLL) c -= 32;
|
|
if (v & 0x0000FFFF0000FFFFLL) c -= 16;
|
|
if (v & 0x00FF00FF00FF00FFLL) c -= 8;
|
|
if (v & 0x0F0F0F0F0F0F0F0FLL) c -= 4;
|
|
if (v & 0x3333333333333333LL) c -= 2;
|
|
if (v & 0x5555555555555555LL) c -= 1;
|
|
return c;
|
|
}
|
|
|
|
template <typename T, typename = typename std::enable_if_t<std::is_integral<T>::value>>
|
|
inline size_t popcount(T mask) {
|
|
return std::bitset<sizeof(T)>(mask).count();
|
|
}
|
|
|
|
template <typename T, typename = typename std::enable_if_t<std::is_integral<T>::value>>
|
|
inline T alignTo(T size, T alignment) {
|
|
return ((size - 1) / alignment + 1) * alignment;
|
|
}
|
|
|
|
template <uint size, uint alignment>
|
|
constexpr uint ALIGN_TO = ((size - 1) / alignment + 1) * alignment;
|
|
|
|
template <class T>
|
|
inline uint toUint(T value) {
|
|
static_assert(std::is_arithmetic<T>::value, "T must be numeric");
|
|
|
|
CC_ASSERT(static_cast<uintmax_t>(value) <= static_cast<uintmax_t>(std::numeric_limits<uint>::max()));
|
|
|
|
return static_cast<uint>(value);
|
|
}
|
|
|
|
template <typename Map>
|
|
Map &mergeToMap(Map &outMap, const Map &inMap) {
|
|
for (const auto &e : inMap) {
|
|
outMap.emplace(e.first, e.second);
|
|
}
|
|
return outMap;
|
|
}
|
|
|
|
namespace numext {
|
|
|
|
template <typename Tgt, typename Src>
|
|
CC_FORCE_INLINE Tgt bit_cast(const Src &src) { // NOLINT(readability-identifier-naming)
|
|
// The behaviour of memcpy is not specified for non-trivially copyable types
|
|
static_assert(std::is_trivially_copyable<Src>::value, "THIS_TYPE_IS_NOT_SUPPORTED");
|
|
static_assert(std::is_trivially_copyable<Tgt>::value && std::is_default_constructible<Tgt>::value,
|
|
"THIS_TYPE_IS_NOT_SUPPORTED");
|
|
static_assert(sizeof(Src) == sizeof(Tgt), "THIS_TYPE_IS_NOT_SUPPORTED");
|
|
|
|
Tgt tgt;
|
|
// Load src into registers first. This allows the memcpy to be elided by CUDA.
|
|
const Src staged = src;
|
|
memcpy(&tgt, &staged, sizeof(Tgt));
|
|
return tgt;
|
|
}
|
|
|
|
} // namespace numext
|
|
|
|
// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
|
|
// compilers seem to follow this. We therefore include it explicitly.
|
|
// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
|
|
#if defined(CC_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
#include <arm_fp16.h>
|
|
#endif
|
|
|
|
// Code from https://gitlab.com/libeigen/eigen/-/blob/master/Eigen/src/Core/arch/Default/Half.h#L586
|
|
struct HalfRaw {
|
|
constexpr HalfRaw() : x(0) {}
|
|
#if defined(CC_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
explicit HalfRaw(uint16_t raw) : x(numext::bit_cast<__fp16>(raw)) {
|
|
}
|
|
__fp16 x;
|
|
#else
|
|
explicit constexpr HalfRaw(uint16_t raw) : x(raw) {}
|
|
uint16_t x; // NOLINT(modernize-use-default-member-init)
|
|
#endif
|
|
};
|
|
|
|
// Conversion routines, including fallbacks for the host or older CUDA.
|
|
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
|
|
// these in hardware. If we need more performance on older/other CPUs, they are
|
|
// also possible to vectorize directly.
|
|
|
|
CC_FORCE_INLINE HalfRaw rawUint16ToHalf(uint16_t x) {
|
|
// We cannot simply do a "return HalfRaw(x)" here, because HalfRaw is union type
|
|
// in the hip_fp16 header file, and that will trigger a compile error
|
|
// On the other hand, having anything but a return statement also triggers a compile error
|
|
// because this is constexpr function.
|
|
// Fortunately, since we need to disable EIGEN_CONSTEXPR for GPU anyway, we can get out
|
|
// of this catch22 by having separate bodies for GPU / non GPU
|
|
#if defined(CC_HAS_GPU_FP16)
|
|
HalfRaw h;
|
|
h.x = x;
|
|
return h;
|
|
#else
|
|
return HalfRaw(x);
|
|
#endif
|
|
}
|
|
|
|
CC_FORCE_INLINE uint16_t rawHalfAsUint16(const HalfRaw &h) {
|
|
// HIP/CUDA/Default have a member 'x' of type uint16_t.
|
|
// For ARM64 native half, the member 'x' is of type __fp16, so we need to bit-cast.
|
|
// For SYCL, cl::sycl::half is _Float16, so cast directly.
|
|
#if defined(CC_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
return numext::bit_cast<uint16_t>(h.x);
|
|
#else
|
|
return h.x;
|
|
#endif
|
|
}
|
|
|
|
union float32_bits {
|
|
unsigned int u;
|
|
float f;
|
|
};
|
|
|
|
CC_FORCE_INLINE HalfRaw floatToHalf(float ff) {
|
|
#if defined(CC_HAS_FP16_C)
|
|
HalfRaw h;
|
|
#ifdef _MSC_VER
|
|
// MSVC does not have scalar instructions.
|
|
h.x = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(ff), 0), 0);
|
|
#else
|
|
h.x = _cvtss_sh(ff, 0);
|
|
#endif
|
|
return h;
|
|
|
|
#elif defined(CC_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
HalfRaw h;
|
|
h.x = static_cast<__fp16>(ff);
|
|
return h;
|
|
|
|
#else
|
|
float32_bits f;
|
|
f.f = ff;
|
|
|
|
const float32_bits f32infty = {255 << 23};
|
|
const float32_bits f16max = {(127 + 16) << 23};
|
|
const float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23}; // NOLINT(readability-identifier-naming)
|
|
unsigned int sign_mask = 0x80000000U; // NOLINT
|
|
HalfRaw o;
|
|
o.x = static_cast<uint16_t>(0x0U);
|
|
|
|
unsigned int sign = f.u & sign_mask;
|
|
f.u ^= sign;
|
|
|
|
// NOTE all the integer compares in this function can be safely
|
|
// compiled into signed compares since all operands are below
|
|
// 0x80000000. Important if you want fast straight SSE2 code
|
|
// (since there's no unsigned PCMPGTD).
|
|
|
|
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
|
|
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
|
|
} else { // (De)normalized number or zero
|
|
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
|
|
// use a magic value to align our 10 mantissa bits at the bottom of
|
|
// the float. as long as FP addition is round-to-nearest-even this
|
|
// just works.
|
|
f.f += denorm_magic.f;
|
|
|
|
// and one integer subtract of the bias later, we have our final float!
|
|
o.x = static_cast<uint16_t>(f.u - denorm_magic.u);
|
|
} else {
|
|
unsigned int mant_odd = (f.u >> 13) & 1; // NOLINT(readability-identifier-naming) // resulting mantissa is odd
|
|
|
|
// update exponent, rounding bias part 1
|
|
// Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
|
|
// without arithmetic overflow.
|
|
f.u += 0xc8000fffU;
|
|
// rounding bias part 2
|
|
f.u += mant_odd;
|
|
// take the bits!
|
|
o.x = static_cast<uint16_t>(f.u >> 13);
|
|
}
|
|
}
|
|
|
|
o.x |= static_cast<uint16_t>(sign >> 16);
|
|
return o;
|
|
#endif
|
|
}
|
|
|
|
CC_FORCE_INLINE float halfToFloat(HalfRaw h) {
|
|
#if defined(CC_HAS_FP16_C)
|
|
#ifdef _MSC_VER
|
|
// MSVC does not have scalar instructions.
|
|
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_set1_epi16(h.x)));
|
|
#else
|
|
return _cvtsh_ss(h.x);
|
|
#endif
|
|
#elif defined(CC_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
|
return static_cast<float>(h.x);
|
|
#else
|
|
const float32_bits magic = {113 << 23};
|
|
const unsigned int shifted_exp = 0x7c00 << 13; // NOLINT(readability-identifier-naming) // exponent mask after shift
|
|
float32_bits o;
|
|
|
|
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
|
unsigned int exp = shifted_exp & o.u; // just the exponent
|
|
o.u += (127 - 15) << 23; // exponent adjust
|
|
|
|
// handle exponent special cases
|
|
if (exp == shifted_exp) { // Inf/NaN?
|
|
o.u += (128 - 16) << 23; // extra exp adjust
|
|
} else if (exp == 0) { // Zero/Denormal?
|
|
o.u += 1 << 23; // extra exp adjust
|
|
o.f -= magic.f; // renormalize
|
|
}
|
|
|
|
o.u |= (h.x & 0x8000) << 16; // sign bit
|
|
return o.f;
|
|
#endif
|
|
}
|
|
|
|
namespace array {
|
|
|
|
/**
|
|
* @zh
|
|
* 移除首个指定的数组元素。判定元素相等时相当于于使用了 `Array.prototype.indexOf`。
|
|
* @en
|
|
* Removes the first occurrence of a specific object from the array.
|
|
* Decision of the equality of elements is similar to `Array.prototype.indexOf`.
|
|
* @param array 数组。
|
|
* @param value 待移除元素。
|
|
*/
|
|
template <typename T>
|
|
bool remove(ccstd::vector<T> &array, T value) {
|
|
auto iter = std::find(array.begin(), array.end(), value);
|
|
if (iter != array.end()) {
|
|
array.erase(iter);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* @zh
|
|
* 移除指定索引的数组元素。
|
|
* @en
|
|
* Removes the array item at the specified index.
|
|
* @param array 数组。
|
|
* @param index 待移除元素的索引。
|
|
*/
|
|
template <typename T>
|
|
bool removeAt(ccstd::vector<T> &array, int32_t index) {
|
|
if (index >= 0 && index < static_cast<int32_t>(array.size())) {
|
|
array.erase(array.begin() + index);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* @zh
|
|
* 移除指定索引的数组元素。
|
|
* 此函数十分高效,但会改变数组的元素次序。
|
|
* @en
|
|
* Removes the array item at the specified index.
|
|
* It's faster but the order of the array will be changed.
|
|
* @param array 数组。
|
|
* @param index 待移除元素的索引。
|
|
*/
|
|
template <typename T>
|
|
bool fastRemoveAt(ccstd::vector<T> &array, int32_t index) {
|
|
const auto length = static_cast<int32_t>(array.size());
|
|
if (index < 0 || index >= length) {
|
|
return false;
|
|
}
|
|
array[index] = array[length - 1];
|
|
array.resize(length - 1);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* @zh
|
|
* 移除首个指定的数组元素。判定元素相等时相当于于使用了 `Array.prototype.indexOf`。
|
|
* 此函数十分高效,但会改变数组的元素次序。
|
|
* @en
|
|
* Removes the first occurrence of a specific object from the array.
|
|
* Decision of the equality of elements is similar to `Array.prototype.indexOf`.
|
|
* It's faster but the order of the array will be changed.
|
|
* @param array 数组。
|
|
* @param value 待移除元素。
|
|
*/
|
|
template <typename T>
|
|
bool fastRemove(ccstd::vector<T> &array, T value) {
|
|
auto iter = std::find(array.begin(), array.end(), value);
|
|
if (iter != array.end()) {
|
|
*iter = array[array.size() - 1];
|
|
array.resize(array.size() - 1);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
} // namespace array
|
|
} // namespace utils
|
|
} // namespace cc
|
|
|