1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "index/column/column_indexer.h" |
18 | #include <gtest/gtest.h> |
19 | #include "index/file_helper.h" |
20 | |
21 | using namespace proxima::be; |
22 | using namespace proxima::be::index; |
23 | |
24 | class ColumnIndexerTest : public testing::Test { |
25 | protected: |
26 | void SetUp() { |
27 | FileHelper::RemoveFile("./data.pxa.test_column.0" ); |
28 | } |
29 | |
30 | void TearDown() {} |
31 | }; |
32 | |
33 | TEST_F(ColumnIndexerTest, TestGeneral) { |
34 | auto column_indexer = |
35 | ColumnIndexer::Create("test_collection" , "./" , 0, "test_column" , |
36 | IndexTypes::PROXIMA_GRAPH_INDEX); |
37 | |
38 | meta::ColumnMeta meta; |
39 | meta.set_name("test_column" ); |
40 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
41 | meta.set_data_type(DataTypes::VECTOR_FP32); |
42 | meta.set_dimension(16); |
43 | |
44 | ReadOptions read_options; |
45 | read_options.use_mmap = true; |
46 | read_options.create_new = true; |
47 | int ret = column_indexer->open(meta, read_options); |
48 | ASSERT_EQ(ret, 0); |
49 | |
50 | // Test insert |
51 | for (size_t i = 0; i < 1000; i++) { |
52 | std::vector<float> fvec(16U); |
53 | for (size_t j = 0; j < 16U; j++) { |
54 | fvec[j] = i * 1.0f; |
55 | } |
56 | |
57 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
58 | ColumnData column_data; |
59 | column_data.column_name = "test_column" ; |
60 | column_data.data_type = DataTypes::VECTOR_FP32; |
61 | column_data.dimension = 16; |
62 | column_data.data = vector; |
63 | ret = column_indexer->insert(i, column_data); |
64 | ASSERT_EQ(ret, 0); |
65 | } |
66 | |
67 | // Test search |
68 | for (size_t i = 0; i < 1000; i++) { |
69 | std::vector<float> fvec(16U); |
70 | for (size_t j = 0; j < 16U; j++) { |
71 | fvec[j] = i * 1.0f; |
72 | } |
73 | |
74 | IndexDocumentList result_list; |
75 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
76 | QueryParams query_params; |
77 | query_params.topk = 10; |
78 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
79 | ASSERT_EQ(ret, 0); |
80 | ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f); |
81 | ASSERT_EQ(result_list[0].key(), i); |
82 | } |
83 | } |
84 | |
85 | TEST_F(ColumnIndexerTest, TestQuantizeFP16) { |
86 | auto column_indexer = |
87 | ColumnIndexer::Create("test_collection" , "./" , 0, "test_column" , |
88 | IndexTypes::PROXIMA_GRAPH_INDEX); |
89 | |
90 | meta::ColumnMeta meta; |
91 | meta.set_name("test_column" ); |
92 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
93 | meta.set_data_type(DataTypes::VECTOR_FP32); |
94 | meta.set_dimension(16); |
95 | meta.mutable_parameters()->set("quantize_type" , "DT_VECTOR_FP16" ); |
96 | |
97 | ReadOptions read_options; |
98 | read_options.use_mmap = true; |
99 | read_options.create_new = true; |
100 | int ret = column_indexer->open(meta, read_options); |
101 | ASSERT_EQ(ret, 0); |
102 | |
103 | for (size_t i = 0; i < 1000; i++) { |
104 | std::vector<float> fvec(16U); |
105 | for (size_t j = 0; j < 16U; j++) { |
106 | fvec[j] = i * 1.0f; |
107 | } |
108 | |
109 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
110 | ColumnData column_data; |
111 | column_data.column_name = "test_column" ; |
112 | column_data.data_type = DataTypes::VECTOR_FP32; |
113 | column_data.dimension = 16; |
114 | column_data.data = vector; |
115 | ret = column_indexer->insert(i, column_data); |
116 | ASSERT_EQ(ret, 0); |
117 | } |
118 | |
119 | for (size_t i = 0; i < 1000; i++) { |
120 | std::vector<float> fvec(16U); |
121 | for (size_t j = 0; j < 16U; j++) { |
122 | fvec[j] = i * 1.0f; |
123 | } |
124 | |
125 | IndexDocumentList result_list; |
126 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
127 | QueryParams query_params; |
128 | query_params.topk = 10; |
129 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
130 | ASSERT_EQ(ret, 0); |
131 | ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f); |
132 | ASSERT_EQ(result_list[0].key(), i); |
133 | } |
134 | } |
135 | |
136 | TEST_F(ColumnIndexerTest, TestQuantizeINT8) { |
137 | auto column_indexer = |
138 | ColumnIndexer::Create("test_collection" , "./" , 0, "test_column" , |
139 | IndexTypes::PROXIMA_GRAPH_INDEX); |
140 | |
141 | meta::ColumnMeta meta; |
142 | meta.set_name("test_column" ); |
143 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
144 | meta.set_data_type(DataTypes::VECTOR_FP32); |
145 | meta.set_dimension(16); |
146 | meta.mutable_parameters()->set("quantize_type" , "DT_VECTOR_INT8" ); |
147 | |
148 | ReadOptions read_options; |
149 | read_options.use_mmap = true; |
150 | read_options.create_new = true; |
151 | int ret = column_indexer->open(meta, read_options); |
152 | ASSERT_EQ(ret, 0); |
153 | |
154 | for (size_t i = 0; i < 1000; i++) { |
155 | std::vector<float> fvec(16U); |
156 | for (size_t j = 0; j < 16U; j++) { |
157 | fvec[j] = i * 1.0f; |
158 | } |
159 | |
160 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
161 | ColumnData column_data; |
162 | column_data.column_name = "test_column" ; |
163 | column_data.data_type = DataTypes::VECTOR_FP32; |
164 | column_data.dimension = 16; |
165 | column_data.data = vector; |
166 | ret = column_indexer->insert(i, column_data); |
167 | ASSERT_EQ(ret, 0); |
168 | } |
169 | |
170 | for (size_t i = 0; i < 1000; i++) { |
171 | std::vector<float> fvec(16U); |
172 | for (size_t j = 0; j < 16U; j++) { |
173 | fvec[j] = i * 1.0f; |
174 | } |
175 | |
176 | IndexDocumentList result_list; |
177 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
178 | QueryParams query_params; |
179 | query_params.topk = 10; |
180 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
181 | ASSERT_EQ(ret, 0); |
182 | ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f); |
183 | ASSERT_EQ(result_list[0].key(), i); |
184 | } |
185 | } |
186 | |
187 | TEST_F(ColumnIndexerTest, TestQuantizeINT8InnerProduct) { |
188 | auto column_indexer = |
189 | ColumnIndexer::Create("test_collection_int8_ip" , "./" , 0, "test_column" , |
190 | IndexTypes::PROXIMA_GRAPH_INDEX); |
191 | meta::ColumnMeta meta; |
192 | meta.set_name("test_column" ); |
193 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
194 | meta.set_data_type(DataTypes::VECTOR_FP32); |
195 | meta.set_dimension(16); |
196 | meta.mutable_parameters()->set("quantize_type" , "DT_VECTOR_INT8" ); |
197 | meta.mutable_parameters()->set("metric_type" , "InnerProduct" ); |
198 | ReadOptions read_options; |
199 | read_options.use_mmap = true; |
200 | read_options.create_new = true; |
201 | int ret = column_indexer->open(meta, read_options); |
202 | ASSERT_EQ(ret, 0); |
203 | for (size_t i = 0; i <= 1000; i++) { |
204 | std::vector<float> fvec(16U); |
205 | for (size_t j = 0; j < 16U; j++) { |
206 | fvec[j] = i * 0.001f; |
207 | } |
208 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
209 | ColumnData column_data; |
210 | column_data.column_name = "test_column" ; |
211 | column_data.data_type = DataTypes::VECTOR_FP32; |
212 | column_data.dimension = 16; |
213 | column_data.data = vector; |
214 | ret = column_indexer->insert(i, column_data); |
215 | ASSERT_EQ(ret, 0); |
216 | } |
217 | std::vector<float> fvec(16U); |
218 | for (size_t j = 0; j < 16U; j++) { |
219 | fvec[j] = 1.0f; |
220 | } |
221 | IndexDocumentList result_list; |
222 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
223 | QueryParams query_params; |
224 | query_params.topk = 10; |
225 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
226 | ASSERT_EQ(ret, 0); |
227 | ASSERT_EQ(1000, result_list[0].key()); |
228 | ASSERT_NEAR(result_list[0].score(), 16.0f, 0.1f); |
229 | } |
230 | |
231 | TEST_F(ColumnIndexerTest, TestQuantizeINT4) { |
232 | auto column_indexer = |
233 | ColumnIndexer::Create("test_collection" , "./" , 0, "test_column" , |
234 | IndexTypes::PROXIMA_GRAPH_INDEX); |
235 | |
236 | meta::ColumnMeta meta; |
237 | meta.set_name("test_column" ); |
238 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
239 | meta.set_data_type(DataTypes::VECTOR_FP32); |
240 | meta.set_dimension(16); |
241 | meta.mutable_parameters()->set("quantize_type" , "DT_VECTOR_INT4" ); |
242 | |
243 | ReadOptions read_options; |
244 | read_options.use_mmap = true; |
245 | read_options.create_new = true; |
246 | int ret = column_indexer->open(meta, read_options); |
247 | ASSERT_EQ(ret, 0); |
248 | |
249 | for (size_t i = 0; i < 1000; i++) { |
250 | std::vector<float> fvec(16U); |
251 | for (size_t j = 0; j < 16U; j++) { |
252 | fvec[j] = i * 1.0f; |
253 | } |
254 | |
255 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
256 | ColumnData column_data; |
257 | column_data.column_name = "test_column" ; |
258 | column_data.data_type = DataTypes::VECTOR_FP32; |
259 | column_data.dimension = 16; |
260 | column_data.data = vector; |
261 | ret = column_indexer->insert(i, column_data); |
262 | ASSERT_EQ(ret, 0); |
263 | } |
264 | |
265 | for (size_t i = 0; i < 1000; i++) { |
266 | std::vector<float> fvec(16U); |
267 | for (size_t j = 0; j < 16U; j++) { |
268 | fvec[j] = i * 1.0f; |
269 | } |
270 | |
271 | IndexDocumentList result_list; |
272 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
273 | QueryParams query_params; |
274 | query_params.topk = 10; |
275 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
276 | ASSERT_EQ(ret, 0); |
277 | ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f); |
278 | ASSERT_EQ(result_list[0].key(), i); |
279 | } |
280 | } |
281 | |
282 | TEST_F(ColumnIndexerTest, TestOswgEngine) { |
283 | auto column_indexer = |
284 | ColumnIndexer::Create("test_collection" , "./" , 0, "test_column" , |
285 | IndexTypes::PROXIMA_GRAPH_INDEX); |
286 | |
287 | meta::ColumnMeta meta; |
288 | meta.set_name("test_column" ); |
289 | meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
290 | meta.set_data_type(DataTypes::VECTOR_FP32); |
291 | meta.set_dimension(16); |
292 | meta.mutable_parameters()->set("engine" , "OSWG" ); |
293 | |
294 | ReadOptions read_options; |
295 | read_options.use_mmap = true; |
296 | read_options.create_new = true; |
297 | int ret = column_indexer->open(meta, read_options); |
298 | ASSERT_EQ(ret, 0); |
299 | |
300 | for (size_t i = 0; i < 1000; i++) { |
301 | std::vector<float> fvec(16U); |
302 | for (size_t j = 0; j < 16U; j++) { |
303 | fvec[j] = i * 1.0f; |
304 | } |
305 | |
306 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
307 | ColumnData column_data; |
308 | column_data.column_name = "test_column" ; |
309 | column_data.data_type = DataTypes::VECTOR_FP32; |
310 | column_data.dimension = 16; |
311 | column_data.data = vector; |
312 | ret = column_indexer->insert(i, column_data); |
313 | ASSERT_EQ(ret, 0); |
314 | } |
315 | |
316 | for (size_t i = 0; i < 1000; i++) { |
317 | std::vector<float> fvec(16U); |
318 | for (size_t j = 0; j < 16U; j++) { |
319 | fvec[j] = i * 1.0f; |
320 | } |
321 | |
322 | IndexDocumentList result_list; |
323 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
324 | QueryParams query_params; |
325 | query_params.topk = 10; |
326 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
327 | ASSERT_EQ(ret, 0); |
328 | ASSERT_NEAR(result_list[0].score(), 0.0f, 0.1f); |
329 | ASSERT_EQ(result_list[0].key(), i); |
330 | } |
331 | |
332 | |
333 | for (size_t i = 0; i < 1000; i++) { |
334 | ret = column_indexer->remove(i); |
335 | ASSERT_EQ(ret, 0); |
336 | |
337 | std::vector<float> fvec(16U); |
338 | for (size_t j = 0; j < 16U; j++) { |
339 | fvec[j] = i * 1.0f; |
340 | } |
341 | |
342 | IndexDocumentList result_list; |
343 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
344 | QueryParams query_params; |
345 | query_params.topk = 10; |
346 | ret = column_indexer->search(query, query_params, nullptr, &result_list); |
347 | ASSERT_EQ(ret, 0); |
348 | if (result_list.size() > 0) { |
349 | ASSERT_NE(result_list[0].key(), i); |
350 | } |
351 | } |
352 | } |
353 | |