SOMHunter Core
keyword-ranker.h
Go to the documentation of this file.
1 /* This file is part of SOMHunter.
2  *
3  * Copyright (C) 2021 Frantisek Mejzlik <frankmejzlik@protonmail.com>
4  * Mirek Kratochvil <exa.exa@gmail.com>
5  * Patrik Vesely <prtrikvesely@gmail.com>
6  *
7  * SOMHunter is free software: you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License as published by the Free
9  * Software Foundation, either version 2 of the License, or (at your option)
10  * any later version.
11  *
12  * SOMHunter is distributed in the hope that it will be useful, but WITHOUT ANY
13  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
15  * details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * SOMHunter. If not, see <https://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef IMAGE_KEYWORDS_W2VV_H_
22 #define IMAGE_KEYWORDS_W2VV_H_
23 
24 #include <cassert>
25 #include <fstream>
26 #include <iomanip>
27 #include <map>
28 #include <set>
29 #include <string>
30 #include <vector>
31 
32 #include "common.h"
33 
34 #include "utils.hpp"
35 
36 #include "dataset-frames.h"
37 #include "embedding-ranker.h"
38 #include "scores.h"
39 #include "settings.h"
40 
41 namespace sh {
42 struct Keyword {
47 
49  std::vector<const VideoFrame*> top_ex_imgs;
50 };
51 
52 class KeywordRanker : public EmbeddingRanker<PrimaryFrameFeatures> {
53  std::vector<Keyword> _keyword_ranker;
58 
59 public:
60  static std::vector<Keyword> parse_kw_classes_text_file(const std::string& filepath,
61  const DatasetFrames& _dataset_frames);
62 
70  // @todo Make this template and inside some `Parsers` class
71  static FeatureMatrix parse_float_matrix(const std::string& filepath, size_t row_dim, size_t begin_offset = 0);
78  // @todo Make this template and inside some `Parsers` class
79  static FeatureVector parse_float_vector(const std::string& filepath, size_t dim, size_t begin_offset = 0);
80 
81  inline KeywordRanker(const Settings& config, const DatasetFrames& _dataset_frames)
82  : _keyword_ranker(parse_kw_classes_text_file(config.datasets.primary_features.kws_file, _dataset_frames)),
83 
84  kw_features(parse_float_matrix(config.datasets.primary_features.kw_scores_mat_file,
85  config.datasets.primary_features.pre_PCA_features_dim)),
86  kw_features_bias_vec(parse_float_vector(config.datasets.primary_features.kw_bias_vec_file,
87  config.datasets.primary_features.pre_PCA_features_dim)),
88 
89  kw_pca_mat(parse_float_matrix(config.datasets.primary_features.kw_PCA_mat_file,
90  config.datasets.primary_features.pre_PCA_features_dim)),
91  kw_pca_mean_vec(parse_float_vector(config.datasets.primary_features.kw_PCA_mean_vec_file,
92  config.datasets.primary_features.pre_PCA_features_dim)) {
93  assert(kw_pca_mat.size() > 1); // Make sure it's a matrix!
94 
95  SHLOG_S("Keyword features loaded from '" << config.datasets.primary_features.kw_scores_mat_file
96  << "' with dimension ("
98  SHLOG_S("Keyword bias loaded from '" << config.datasets.primary_features.kw_bias_vec_file
99  << "' with dimension ("
101 
102  SHLOG_S("Loaded PCA matrix from '" << config.datasets.primary_features.kw_PCA_mat_file << "' with dimension ("
104  << config.datasets.primary_features.kw_PCA_mat_dim << ").");
105  SHLOG_S("Loaded PCA mean vector from '" << config.datasets.primary_features.kw_bias_vec_file
106  << "' with dimension ("
108  }
109 
110  KeywordRanker(const KeywordRanker&) = delete;
112 
115  ~KeywordRanker() noexcept = default;
116 
117  static void report_results(const StdVector<std::pair<FrameId, float>>& sorted_results,
118  const DatasetFrames& _dataset_frames, size_t num = 10);
119 
120  StdVector<float> embedd_text_queries(const StdVector<KeywordId>& kws) const;
121 
125  const Keyword& operator[](KeywordId idx) const {
126  // Get all keywords with this Keyword ID
127  return _keyword_ranker[idx];
128  }
129 
130  KwSearchIds find(const std::string& search, size_t num_limit) const;
131 
132  void rank_sentence_query(const std::string& sentence_query_raw, ScoreModel& model,
133  const PrimaryFrameFeatures& _dataset_features, size_t temporal) const;
134 
135  // ----
136  StdVector<float> get_text_query_feature(const std::string& query);
137  std::vector<std::string> tokenize_textual_query(const std::string& sentence_query_raw) const;
138  std::vector<KeywordId> decode_keywords(const std::vector<std::string>& query) const;
139 
140  // -----
141 };
142 }; // namespace sh
143 
144 #endif // IMAGE_KEYWORDS_W2VV_H_
Definition: dataset-frames.h:162
Definition: embedding-ranker.h:43
Definition: keyword-ranker.h:52
static FeatureMatrix parse_float_matrix(const std::string &filepath, size_t row_dim, size_t begin_offset=0)
Parses float matrix from a binary file that is written in row-major format and starts at begin_offset...
Definition: keyword-ranker.cpp:158
static std::vector< Keyword > parse_kw_classes_text_file(const std::string &filepath, const DatasetFrames &_dataset_frames)
Definition: keyword-ranker.cpp:29
FeatureMatrix kw_features
Definition: keyword-ranker.h:54
std::vector< Keyword > _keyword_ranker
Definition: keyword-ranker.h:53
KwSearchIds find(const std::string &search, size_t num_limit) const
Definition: keyword-ranker.cpp:220
std::vector< KeywordId > decode_keywords(const std::vector< std::string > &query) const
Definition: keyword-ranker.cpp:291
static FeatureVector parse_float_vector(const std::string &filepath, size_t dim, size_t begin_offset=0)
FORMAT: Matrix of 4B floats:
Definition: keyword-ranker.cpp:99
KeywordRanker(KeywordRanker &&)=default
StdVector< float > embedd_text_queries(const StdVector< KeywordId > &kws) const
Definition: keyword-ranker.cpp:324
FeatureVector kw_features_bias_vec
Definition: keyword-ranker.h:55
StdVector< float > get_text_query_feature(const std::string &query)
Definition: keyword-ranker.cpp:255
static void report_results(const StdVector< std::pair< FrameId, float >> &sorted_results, const DatasetFrames &_dataset_frames, size_t num=10)
Definition: keyword-ranker.cpp:348
std::vector< std::string > tokenize_textual_query(const std::string &sentence_query_raw) const
Definition: keyword-ranker.cpp:268
FeatureVector kw_pca_mean_vec
Definition: keyword-ranker.h:57
KeywordRanker & operator=(KeywordRanker &&)=default
KeywordRanker(const KeywordRanker &)=delete
~KeywordRanker() noexcept=default
KeywordRanker(const Settings &config, const DatasetFrames &_dataset_frames)
Definition: keyword-ranker.h:81
KeywordRanker & operator=(const KeywordRanker &)=delete
FeatureMatrix kw_pca_mat
Definition: keyword-ranker.h:56
void rank_sentence_query(const std::string &sentence_query_raw, ScoreModel &model, const PrimaryFrameFeatures &_dataset_features, size_t temporal) const
Definition: keyword-ranker.cpp:303
Definition: scores.h:34
Definition: common-types.h:33
size_t SynsetId
Definition: common-types.h:67
size_t KeywordId
Definition: common-types.h:60
std::vector< std::string > SynsetStrings
Definition: common-types.h:68
std::vector< KwSearchId > KwSearchIds
Definition: common-types.h:63
std::string KwDescription
Definition: common-types.h:64
std::vector< float > FeatureVector
Definition: common-types.h:78
std::vector< std::vector< float > > FeatureMatrix
Definition: common-types.h:77
std::vector< T_ > StdVector
Definition: common-types.h:84
unsigned long FrameId
Definition: common-types.h:75
#define SHLOG_S(x)
Definition: static-logger.hpp:173
std::string kw_PCA_mat_file
Definition: settings.h:158
size_t pre_PCA_features_dim
Definition: settings.h:154
std::string kw_bias_vec_file
Definition: settings.h:155
size_t kw_PCA_mat_dim
Definition: settings.h:159
std::string kw_scores_mat_file
Definition: settings.h:156
PrimaryFeaturesSettings primary_features
Definition: settings.h:183
Definition: keyword-ranker.h:42
KeywordId kw_ID
Definition: keyword-ranker.h:43
SynsetStrings synset_strs
Definition: keyword-ranker.h:45
std::vector< const VideoFrame * > top_ex_imgs
Best representative images for this keyword.
Definition: keyword-ranker.h:49
SynsetId synset_ID
Definition: keyword-ranker.h:44
KwDescription desc
Definition: keyword-ranker.h:46
Parsed current config of the core.
Definition: settings.h:190
DatasetsSettings datasets
Definition: settings.h:203