38 constexpr
inline float get(__m128 V) {
44 return converter.a[i];
48 constexpr
inline float get(__m128 V) {
54 static inline float sqrf(
float _size) {
return _size * _size; }
56 inline static float d_sqeucl(
const float* p1,
const float* p2,
const size_t dim) {
59 for (
size_t i = 0; i < dim; ++i) {
60 float tmp = p1[i] - p2[i];
65 const float *p1e = p1 + dim, *p1ie = p1e - 3;
67 __m128 s = _mm_setzero_ps();
68 for (; p1 < p1ie; p1 += 4, p2 += 4) {
69 __m128 tmp = _mm_sub_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2));
70 s = _mm_add_ps(_mm_mul_ps(tmp, tmp), s);
72 float sqdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
73 for (; p1 < p1e; ++p1, ++p2) {
74 float tmp = *p1 - *p2;
82 inline static __m128 abs_mask(
void) {
83 __m128i minus1 = _mm_set1_epi32(-1);
84 return _mm_castsi128_ps(_mm_srli_epi32(minus1, 1));
86 inline static __m128 vec_abs(__m128 v) {
return _mm_and_ps(abs_mask(), v); }
89 inline static float d_manhattan(
const float* p1,
const float* p2,
const size_t dim) {
92 for (
size_t i = 0; i < dim; ++i) {
93 mdist += std::abs(p1[i] - p2[i]);
97 const float *p1e = p1 + dim, *p1ie = p1e - 3;
99 __m128 s = _mm_setzero_ps();
100 for (; p1 < p1ie; p1 += 4, p2 += 4) {
101 s = _mm_add_ps(s, vec_abs(_mm_sub_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2))));
103 float mdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
104 for (; p1 < p1e; ++p1, ++p2) {
105 mdist += std::abs(*p1 - *p2);
111 inline static float d_dot_normalized(
const float* p1,
const float* p2,
const size_t dim) {
114 for (
size_t i = 0; i < dim; ++i) {
115 mdist += p1[i] * p2[i];
119 const float *p1e = p1 + dim, *p1ie = p1e - 3;
121 __m128 s = _mm_setzero_ps();
122 for (; p1 < p1ie; p1 += 4, p2 += 4) {
123 s = _mm_add_ps(s, _mm_mul_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2)));
125 float mdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
126 for (; p1 < p1e; ++p1, ++p2) {
133 inline static float d_cos_normalized(
const float* p1,
const float* p2,
const size_t dim) {
134 return 1.0F - d_dot_normalized(p1, p2, dim);