SOMHunter Core
distances.hpp
Go to the documentation of this file.
1 /* This file is part of SOMHunter.
2  *
3  * Copyright (C) 2021 Frantisek Mejzlik <frankmejzlik@protonmail.com>
4  * Mirek Kratochvil <exa.exa@gmail.com>
5  * Patrik Vesely <prtrikvesely@gmail.com>
6  *
7  * SOMHunter is free software: you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License as published by the Free
9  * Software Foundation, either version 2 of the License, or (at your option)
10  * any later version.
11  *
12  * SOMHunter is distributed in the hope that it will be useful, but WITHOUT ANY
13  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
15  * details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * SOMHunter. If not, see <https://www.gnu.org/licenses/>.
19  */
20 
26 #ifndef DISTANCES_H_
27 #define DISTANCES_H_
28 
29 #include <algorithm>
30 #include <cmath>
31 
32 #include "common.h"
33 
34 #ifdef USE_INTRINS
35 
36 # ifdef _MSC_VER
37 template <unsigned i>
38 constexpr inline float get(__m128 V) {
39  union {
40  __m128 v;
41  float a[4];
42  } converter;
43  converter.v = V;
44  return converter.a[i];
45 }
46 # else
47 template <unsigned i>
48 constexpr inline float get(__m128 V) {
49  return V[i];
50 }
51 # endif
52 #endif
53 
54 static inline float sqrf(float _size) { return _size * _size; }
55 
56 inline static float d_sqeucl(const float* p1, const float* p2, const size_t dim) {
57 #ifndef USE_INTRINS
58  float sqdist = 0;
59  for (size_t i = 0; i < dim; ++i) {
60  float tmp = p1[i] - p2[i];
61  sqdist += tmp * tmp;
62  }
63  return sqdist;
64 #else
65  const float *p1e = p1 + dim, *p1ie = p1e - 3;
66 
67  __m128 s = _mm_setzero_ps();
68  for (; p1 < p1ie; p1 += 4, p2 += 4) {
69  __m128 tmp = _mm_sub_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2));
70  s = _mm_add_ps(_mm_mul_ps(tmp, tmp), s);
71  }
72  float sqdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
73  for (; p1 < p1e; ++p1, ++p2) {
74  float tmp = *p1 - *p2;
75  sqdist += tmp * tmp;
76  }
77  return sqdist;
78 #endif
79 }
80 
81 #ifdef USE_INTRINS
82 inline static __m128 abs_mask(void) {
83  __m128i minus1 = _mm_set1_epi32(-1);
84  return _mm_castsi128_ps(_mm_srli_epi32(minus1, 1));
85 }
86 inline static __m128 vec_abs(__m128 v) { return _mm_and_ps(abs_mask(), v); }
87 #endif
88 
89 inline static float d_manhattan(const float* p1, const float* p2, const size_t dim) {
90 #ifndef USE_INTRINS
91  float mdist = 0;
92  for (size_t i = 0; i < dim; ++i) {
93  mdist += std::abs(p1[i] - p2[i]);
94  }
95  return mdist;
96 #else
97  const float *p1e = p1 + dim, *p1ie = p1e - 3;
98 
99  __m128 s = _mm_setzero_ps();
100  for (; p1 < p1ie; p1 += 4, p2 += 4) {
101  s = _mm_add_ps(s, vec_abs(_mm_sub_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2))));
102  }
103  float mdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
104  for (; p1 < p1e; ++p1, ++p2) {
105  mdist += std::abs(*p1 - *p2);
106  }
107  return mdist;
108 #endif
109 }
110 
111 inline static float d_dot_normalized(const float* p1, const float* p2, const size_t dim) {
112 #ifndef USE_INTRINS
113  float mdist = 0;
114  for (size_t i = 0; i < dim; ++i) {
115  mdist += p1[i] * p2[i];
116  }
117  return mdist;
118 #else
119  const float *p1e = p1 + dim, *p1ie = p1e - 3;
120 
121  __m128 s = _mm_setzero_ps();
122  for (; p1 < p1ie; p1 += 4, p2 += 4) {
123  s = _mm_add_ps(s, _mm_mul_ps(_mm_loadu_ps(p1), _mm_loadu_ps(p2)));
124  }
125  float mdist = get<0>(s) + get<1>(s) + get<2>(s) + get<3>(s);
126  for (; p1 < p1e; ++p1, ++p2) {
127  mdist += *p1 * *p2;
128  }
129  return mdist;
130 #endif
131 }
132 
133 inline static float d_cos_normalized(const float* p1, const float* p2, const size_t dim) {
134  return 1.0F - d_dot_normalized(p1, p2, dim);
135 }
136 
137 #endif // DISTANCES_H_