21 #ifndef DATASET_FEATURES_H_
22 #define DATASET_FEATURES_H_
39 template <
typename SETT>
50 const float*
fv(
size_t i)
const {
return _data.data() +
_dim * i; }
53 size_t from_shot_limit = 0)
const;
56 std::function<
bool(
FrameId ID)> pred,
size_t per_vid_limit = 0,
57 size_t from_shot_limit = 0)
const;
61 float d_eucl(
size_t i,
size_t j)
const;
63 float d_cos(
size_t i,
size_t j)
const;
84 :
primary{ frames, config.datasets.primary_features },
85 secondary{ frames, config.datasets.secondary_features } {};
92 template <
typename SETT>
95 if (config.features_file.empty()) {
96 SHLOG_W(
"No features provided for '" << utils::type_name<SETT>() <<
"'...");
100 SHLOG_D(
"Loading dataset features from '" << config.features_file <<
"'...");
106 std::ifstream in(config.features_file, std::ios::binary);
108 std::string msg{
"Error opening features file '" + config.features_file +
"'!" };
110 throw std::runtime_error(msg);
114 in.ignore(config.features_file_data_off);
116 if (!in.read(
reinterpret_cast<char*
>(
_data.data()),
sizeof(
float) *
_data.size())) {
117 std::string msg{
"Feature matrix reading problems at '" + config.features_file +
"'!" };
119 throw std::runtime_error(msg);
121 SHLOG_S(
"Successfully loaded " <<
_size <<
" frame features of dimension " << config._dim <<
".");
125 if (in.rdbuf()->in_avail() != 0) {
126 std::string msg{
"Incomplete read of the '" + config.features_file +
"' file!" };
128 throw std::runtime_error(msg);
132 template <
typename SETT>
134 size_t per_vid_limit,
size_t from_shot_limit)
const {
136 _dataset_frames,
id, [](
FrameId ) {
return true; }, per_vid_limit, from_shot_limit);
139 template <
typename SETT>
141 std::function<
bool(
FrameId ID)> pred,
size_t per_vid_limit,
142 size_t from_shot_limit)
const {
143 if (per_vid_limit == 0) per_vid_limit = _dataset_frames.
size();
145 if (from_shot_limit == 0) from_shot_limit = _dataset_frames.
size();
147 auto cmp = [](
const std::pair<FrameId, float>& left,
const std::pair<FrameId, float>& right) {
148 return left.second > right.second;
151 std::priority_queue<std::pair<FrameId, float>, std::vector<std::pair<FrameId, float>>, decltype(cmp)> q3(cmp);
153 for (
FrameId i{ 0 }; i < _size; ++i) {
154 auto d = d_dot_normalized(
id, i);
158 std::vector<FrameId> res;
162 std::vector<size_t> per_vid_frame_hist(num_videos, 0);
163 std::map<VideoId, std::map<ShotId, size_t>> frames_per_shot;
166 auto [adept_ID, f]{ q3.top() };
168 if (q3.empty())
break;
172 auto vf = _dataset_frames.
get_frame(adept_ID);
175 if (per_vid_frame_hist[vf.video_ID] >= per_vid_limit)
continue;
178 if (frames_per_shot[vf.video_ID][vf.shot_ID] >= from_shot_limit)
continue;
181 if (pred(adept_ID)) {
182 res.emplace_back(adept_ID);
183 per_vid_frame_hist[vf.video_ID]++;
184 frames_per_shot[vf.video_ID][vf.shot_ID]++;
191 template <
typename SETT>
193 return ::d_manhattan(fv(i), fv(j), _dim);
196 template <
typename SETT>
198 return ::d_sqeucl(fv(i), fv(j), _dim);
201 template <
typename SETT>
203 return sqrtf(d_sqeucl(i, j));
206 template <
typename SETT>
208 return 1 - ::d_dot_normalized(fv(i), fv(j), _dim);
211 template <
typename SETT>
213 float s = 0, w1 = 0, w2 = 0;
214 const float *iv = fv(i), *jv = fv(j);
215 for (
size_t d = 0; d < _dim; ++d) {
217 w1 += utils::square(iv[d]);
218 w2 += utils::square(jv[d]);
220 if (w1 == 0 && w2 == 0)
return 0;
221 return 1 - s / sqrtf(w1 * w2);
Represents all available feature sets.
Definition: dataset-features.h:81
PrimaryFrameFeatures primary
Definition: dataset-features.h:85
SecondaryFrameFeatures secondary
Definition: dataset-features.h:89
DatasetFeatures(const DatasetFrames &frames, const Settings &config)
Definition: dataset-features.h:83
Definition: dataset-frames.h:162
size_t get_num_videos() const
Definition: dataset-frames.h:182
size_t size() const
Definition: dataset-frames.h:194
VideoFrame & get_frame(FrameId i)
Definition: dataset-frames.h:184
Represents one set of features for the given dataset.
Definition: dataset-features.h:40
size_t size() const
Definition: dataset-features.h:48
FrameFeatures(const DatasetFrames &frames, const SETT &config)
Definition: dataset-features.h:93
float d_dot_normalized(size_t i, size_t j) const
Definition: dataset-features.h:207
float d_sqeucl(size_t i, size_t j) const
Definition: dataset-features.h:197
size_t dim() const
Definition: dataset-features.h:49
std::vector< float > _data
Raw flat data matrix (row-wise).
Definition: dataset-features.h:72
float d_eucl(size_t i, size_t j) const
Definition: dataset-features.h:202
std::vector< FrameId > get_top_knn(const DatasetFrames &_dataset_frames, FrameId id, std::function< bool(FrameId ID)> pred, size_t per_vid_limit=0, size_t from_shot_limit=0) const
Definition: dataset-features.h:140
const float * fv(size_t i) const
Definition: dataset-features.h:50
float d_cos(size_t i, size_t j) const
Definition: dataset-features.h:212
std::size_t _size
Number of rows (i.e.
Definition: dataset-features.h:68
float d_manhattan(size_t i, size_t j) const
Definition: dataset-features.h:192
std::size_t _dim
Number of vector components.
Definition: dataset-features.h:70
std::vector< FrameId > get_top_knn(const DatasetFrames &_dataset_frames, FrameId id, size_t per_vid_limit=0, size_t from_shot_limit=0) const
Definition: dataset-features.h:133
File implementing distance calculations on vectors.
Definition: common-types.h:33
unsigned long FrameId
Definition: common-types.h:75
#define TOPKNN_LIMIT
Definition: static-config.h:69
#define SHLOG_E(x)
Definition: static-logger.hpp:157
#define SHLOG_S(x)
Definition: static-logger.hpp:173
#define SHLOG_W(x)
Definition: static-logger.hpp:165
#define SHLOG_D(x)
Definition: static-logger.hpp:179
Parsed current config of the core.
Definition: settings.h:190