You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
cocos_lib/cocos/math/MathUtilSSE.inl

179 lines
5.5 KiB

/**
Copyright 2013 BlackBerry Inc.
Copyright (c) 2014-2016 Chukong Technologies Inc.
Copyright (c) 2017-2023 Xiamen Yaji Software Co., Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original file from GamePlay3D: http://gameplay3d.org
This file was modified to fit the cocos2d-x project
*/
NS_CC_MATH_BEGIN
#ifdef __SSE__
void MathUtil::addMatrix(const __m128 m[4], float scalar, __m128 dst[4])
{
__m128 s = _mm_set1_ps(scalar);
dst[0] = _mm_add_ps(m[0], s);
dst[1] = _mm_add_ps(m[1], s);
dst[2] = _mm_add_ps(m[2], s);
dst[3] = _mm_add_ps(m[3], s);
}
void MathUtil::addMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
dst[0] = _mm_add_ps(m1[0], m2[0]);
dst[1] = _mm_add_ps(m1[1], m2[1]);
dst[2] = _mm_add_ps(m1[2], m2[2]);
dst[3] = _mm_add_ps(m1[3], m2[3]);
}
void MathUtil::subtractMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
dst[0] = _mm_sub_ps(m1[0], m2[0]);
dst[1] = _mm_sub_ps(m1[1], m2[1]);
dst[2] = _mm_sub_ps(m1[2], m2[2]);
dst[3] = _mm_sub_ps(m1[3], m2[3]);
}
void MathUtil::multiplyMatrix(const __m128 m[4], float scalar, __m128 dst[4])
{
__m128 s = _mm_set1_ps(scalar);
dst[0] = _mm_mul_ps(m[0], s);
dst[1] = _mm_mul_ps(m[1], s);
dst[2] = _mm_mul_ps(m[2], s);
dst[3] = _mm_mul_ps(m[3], s);
}
void MathUtil::multiplyMatrix(const __m128 m1[4], const __m128 m2[4], __m128 dst[4])
{
__m128 dst0, dst1, dst2, dst3;
{
__m128 e0 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[0], m2[0], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst0 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[1], m2[1], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst1 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[2], m2[2], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst2 = a2;
}
{
__m128 e0 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(0, 0, 0, 0));
__m128 e1 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(1, 1, 1, 1));
__m128 e2 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(2, 2, 2, 2));
__m128 e3 = _mm_shuffle_ps(m2[3], m2[3], _MM_SHUFFLE(3, 3, 3, 3));
__m128 v0 = _mm_mul_ps(m1[0], e0);
__m128 v1 = _mm_mul_ps(m1[1], e1);
__m128 v2 = _mm_mul_ps(m1[2], e2);
__m128 v3 = _mm_mul_ps(m1[3], e3);
__m128 a0 = _mm_add_ps(v0, v1);
__m128 a1 = _mm_add_ps(v2, v3);
__m128 a2 = _mm_add_ps(a0, a1);
dst3 = a2;
}
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
dst[3] = dst3;
}
void MathUtil::negateMatrix(const __m128 m[4], __m128 dst[4])
{
__m128 z = _mm_setzero_ps();
dst[0] = _mm_sub_ps(z, m[0]);
dst[1] = _mm_sub_ps(z, m[1]);
dst[2] = _mm_sub_ps(z, m[2]);
dst[3] = _mm_sub_ps(z, m[3]);
}
void MathUtil::transposeMatrix(const __m128 m[4], __m128 dst[4])
{
__m128 tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44);
__m128 tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE);
__m128 tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44);
__m128 tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE);
dst[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
dst[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
dst[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
dst[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
}
void MathUtil::transformVec4(const __m128 m[4], const __m128& v, __m128& dst)
{
__m128 col1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
__m128 col2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
__m128 col3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
__m128 col4 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
dst = _mm_add_ps(
_mm_add_ps(_mm_mul_ps(m[0], col1), _mm_mul_ps(m[1], col2)),
_mm_add_ps(_mm_mul_ps(m[2], col3), _mm_mul_ps(m[3], col4))
);
}
#endif
NS_CC_MATH_END