1/**
2 * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "index/column/column_indexer.h"
18#include <gtest/gtest.h>
19#include "index/file_helper.h"
20
21using namespace proxima::be;
22using namespace proxima::be::index;
23
24class ColumnIndexerTest : public testing::Test {
25 protected:
26 void SetUp() {
27 FileHelper::RemoveFile("./data.pxa.test_column.0");
28 }
29
30 void TearDown() {}
31};
32
33TEST_F(ColumnIndexerTest, TestGeneral) {
34 auto column_indexer =
35 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
36 IndexTypes::PROXIMA_GRAPH_INDEX);
37
38 meta::ColumnMeta meta;
39 meta.set_name("test_column");
40 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
41 meta.set_data_type(DataTypes::VECTOR_FP32);
42 meta.set_dimension(16);
43
44 ReadOptions read_options;
45 read_options.use_mmap = true;
46 read_options.create_new = true;
47 int ret = column_indexer->open(meta, read_options);
48 ASSERT_EQ(ret, 0);
49
50 // Test insert
51 for (size_t i = 0; i < 1000; i++) {
52 std::vector<float> fvec(16U);
53 for (size_t j = 0; j < 16U; j++) {
54 fvec[j] = i * 1.0f;
55 }
56
57 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
58 ColumnData column_data;
59 column_data.column_name = "test_column";
60 column_data.data_type = DataTypes::VECTOR_FP32;
61 column_data.dimension = 16;
62 column_data.data = vector;
63 ret = column_indexer->insert(i, column_data);
64 ASSERT_EQ(ret, 0);
65 }
66
67 // Test search
68 for (size_t i = 0; i < 1000; i++) {
69 std::vector<float> fvec(16U);
70 for (size_t j = 0; j < 16U; j++) {
71 fvec[j] = i * 1.0f;
72 }
73
74 IndexDocumentList result_list;
75 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
76 QueryParams query_params;
77 query_params.topk = 10;
78 ret = column_indexer->search(query, query_params, nullptr, &result_list);
79 ASSERT_EQ(ret, 0);
80 ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f);
81 ASSERT_EQ(result_list[0].key(), i);
82 }
83}
84
85TEST_F(ColumnIndexerTest, TestQuantizeFP16) {
86 auto column_indexer =
87 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
88 IndexTypes::PROXIMA_GRAPH_INDEX);
89
90 meta::ColumnMeta meta;
91 meta.set_name("test_column");
92 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
93 meta.set_data_type(DataTypes::VECTOR_FP32);
94 meta.set_dimension(16);
95 meta.mutable_parameters()->set("quantize_type", "DT_VECTOR_FP16");
96
97 ReadOptions read_options;
98 read_options.use_mmap = true;
99 read_options.create_new = true;
100 int ret = column_indexer->open(meta, read_options);
101 ASSERT_EQ(ret, 0);
102
103 for (size_t i = 0; i < 1000; i++) {
104 std::vector<float> fvec(16U);
105 for (size_t j = 0; j < 16U; j++) {
106 fvec[j] = i * 1.0f;
107 }
108
109 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
110 ColumnData column_data;
111 column_data.column_name = "test_column";
112 column_data.data_type = DataTypes::VECTOR_FP32;
113 column_data.dimension = 16;
114 column_data.data = vector;
115 ret = column_indexer->insert(i, column_data);
116 ASSERT_EQ(ret, 0);
117 }
118
119 for (size_t i = 0; i < 1000; i++) {
120 std::vector<float> fvec(16U);
121 for (size_t j = 0; j < 16U; j++) {
122 fvec[j] = i * 1.0f;
123 }
124
125 IndexDocumentList result_list;
126 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
127 QueryParams query_params;
128 query_params.topk = 10;
129 ret = column_indexer->search(query, query_params, nullptr, &result_list);
130 ASSERT_EQ(ret, 0);
131 ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f);
132 ASSERT_EQ(result_list[0].key(), i);
133 }
134}
135
136TEST_F(ColumnIndexerTest, TestQuantizeINT8) {
137 auto column_indexer =
138 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
139 IndexTypes::PROXIMA_GRAPH_INDEX);
140
141 meta::ColumnMeta meta;
142 meta.set_name("test_column");
143 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
144 meta.set_data_type(DataTypes::VECTOR_FP32);
145 meta.set_dimension(16);
146 meta.mutable_parameters()->set("quantize_type", "DT_VECTOR_INT8");
147
148 ReadOptions read_options;
149 read_options.use_mmap = true;
150 read_options.create_new = true;
151 int ret = column_indexer->open(meta, read_options);
152 ASSERT_EQ(ret, 0);
153
154 for (size_t i = 0; i < 1000; i++) {
155 std::vector<float> fvec(16U);
156 for (size_t j = 0; j < 16U; j++) {
157 fvec[j] = i * 1.0f;
158 }
159
160 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
161 ColumnData column_data;
162 column_data.column_name = "test_column";
163 column_data.data_type = DataTypes::VECTOR_FP32;
164 column_data.dimension = 16;
165 column_data.data = vector;
166 ret = column_indexer->insert(i, column_data);
167 ASSERT_EQ(ret, 0);
168 }
169
170 for (size_t i = 0; i < 1000; i++) {
171 std::vector<float> fvec(16U);
172 for (size_t j = 0; j < 16U; j++) {
173 fvec[j] = i * 1.0f;
174 }
175
176 IndexDocumentList result_list;
177 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
178 QueryParams query_params;
179 query_params.topk = 10;
180 ret = column_indexer->search(query, query_params, nullptr, &result_list);
181 ASSERT_EQ(ret, 0);
182 ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f);
183 ASSERT_EQ(result_list[0].key(), i);
184 }
185}
186
187TEST_F(ColumnIndexerTest, TestQuantizeINT8InnerProduct) {
188 auto column_indexer =
189 ColumnIndexer::Create("test_collection_int8_ip", "./", 0, "test_column",
190 IndexTypes::PROXIMA_GRAPH_INDEX);
191 meta::ColumnMeta meta;
192 meta.set_name("test_column");
193 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
194 meta.set_data_type(DataTypes::VECTOR_FP32);
195 meta.set_dimension(16);
196 meta.mutable_parameters()->set("quantize_type", "DT_VECTOR_INT8");
197 meta.mutable_parameters()->set("metric_type", "InnerProduct");
198 ReadOptions read_options;
199 read_options.use_mmap = true;
200 read_options.create_new = true;
201 int ret = column_indexer->open(meta, read_options);
202 ASSERT_EQ(ret, 0);
203 for (size_t i = 0; i <= 1000; i++) {
204 std::vector<float> fvec(16U);
205 for (size_t j = 0; j < 16U; j++) {
206 fvec[j] = i * 0.001f;
207 }
208 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
209 ColumnData column_data;
210 column_data.column_name = "test_column";
211 column_data.data_type = DataTypes::VECTOR_FP32;
212 column_data.dimension = 16;
213 column_data.data = vector;
214 ret = column_indexer->insert(i, column_data);
215 ASSERT_EQ(ret, 0);
216 }
217 std::vector<float> fvec(16U);
218 for (size_t j = 0; j < 16U; j++) {
219 fvec[j] = 1.0f;
220 }
221 IndexDocumentList result_list;
222 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
223 QueryParams query_params;
224 query_params.topk = 10;
225 ret = column_indexer->search(query, query_params, nullptr, &result_list);
226 ASSERT_EQ(ret, 0);
227 ASSERT_EQ(1000, result_list[0].key());
228 ASSERT_NEAR(result_list[0].score(), 16.0f, 0.1f);
229}
230
231TEST_F(ColumnIndexerTest, TestQuantizeINT4) {
232 auto column_indexer =
233 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
234 IndexTypes::PROXIMA_GRAPH_INDEX);
235
236 meta::ColumnMeta meta;
237 meta.set_name("test_column");
238 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
239 meta.set_data_type(DataTypes::VECTOR_FP32);
240 meta.set_dimension(16);
241 meta.mutable_parameters()->set("quantize_type", "DT_VECTOR_INT4");
242
243 ReadOptions read_options;
244 read_options.use_mmap = true;
245 read_options.create_new = true;
246 int ret = column_indexer->open(meta, read_options);
247 ASSERT_EQ(ret, 0);
248
249 for (size_t i = 0; i < 1000; i++) {
250 std::vector<float> fvec(16U);
251 for (size_t j = 0; j < 16U; j++) {
252 fvec[j] = i * 1.0f;
253 }
254
255 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
256 ColumnData column_data;
257 column_data.column_name = "test_column";
258 column_data.data_type = DataTypes::VECTOR_FP32;
259 column_data.dimension = 16;
260 column_data.data = vector;
261 ret = column_indexer->insert(i, column_data);
262 ASSERT_EQ(ret, 0);
263 }
264
265 for (size_t i = 0; i < 1000; i++) {
266 std::vector<float> fvec(16U);
267 for (size_t j = 0; j < 16U; j++) {
268 fvec[j] = i * 1.0f;
269 }
270
271 IndexDocumentList result_list;
272 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
273 QueryParams query_params;
274 query_params.topk = 10;
275 ret = column_indexer->search(query, query_params, nullptr, &result_list);
276 ASSERT_EQ(ret, 0);
277 ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f);
278 ASSERT_EQ(result_list[0].key(), i);
279 }
280}
281
282TEST_F(ColumnIndexerTest, TestOswgEngine) {
283 auto column_indexer =
284 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
285 IndexTypes::PROXIMA_GRAPH_INDEX);
286
287 meta::ColumnMeta meta;
288 meta.set_name("test_column");
289 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
290 meta.set_data_type(DataTypes::VECTOR_FP32);
291 meta.set_dimension(16);
292 meta.mutable_parameters()->set("engine", "OSWG");
293
294 ReadOptions read_options;
295 read_options.use_mmap = true;
296 read_options.create_new = true;
297 int ret = column_indexer->open(meta, read_options);
298 ASSERT_EQ(ret, 0);
299
300 for (size_t i = 0; i < 1000; i++) {
301 std::vector<float> fvec(16U);
302 for (size_t j = 0; j < 16U; j++) {
303 fvec[j] = i * 1.0f;
304 }
305
306 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
307 ColumnData column_data;
308 column_data.column_name = "test_column";
309 column_data.data_type = DataTypes::VECTOR_FP32;
310 column_data.dimension = 16;
311 column_data.data = vector;
312 ret = column_indexer->insert(i, column_data);
313 ASSERT_EQ(ret, 0);
314 }
315
316 for (size_t i = 0; i < 1000; i++) {
317 std::vector<float> fvec(16U);
318 for (size_t j = 0; j < 16U; j++) {
319 fvec[j] = i * 1.0f;
320 }
321
322 IndexDocumentList result_list;
323 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
324 QueryParams query_params;
325 query_params.topk = 10;
326 ret = column_indexer->search(query, query_params, nullptr, &result_list);
327 ASSERT_EQ(ret, 0);
328 ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f);
329 ASSERT_EQ(result_list[0].key(), i);
330 }
331
332
333 for (size_t i = 0; i < 1000; i++) {
334 ret = column_indexer->remove(i);
335 ASSERT_EQ(ret, 0);
336
337 std::vector<float> fvec(16U);
338 for (size_t j = 0; j < 16U; j++) {
339 fvec[j] = i * 1.0f;
340 }
341
342 IndexDocumentList result_list;
343 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
344 QueryParams query_params;
345 query_params.topk = 10;
346 ret = column_indexer->search(query, query_params, nullptr, &result_list);
347 ASSERT_EQ(ret, 0);
348 if (result_list.size() > 0) {
349 ASSERT_NE(result_list[0].key(), i);
350 }
351 }
352}
353