1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <ailego/encoding/json.h> |
18 | #include <ailego/utility/float_helper.h> |
19 | #include <ailego/utility/time_helper.h> |
20 | |
21 | #define private public |
22 | #include "server/proto_converter.h" |
23 | #undef private |
24 | #include <gtest/gtest.h> |
25 | |
26 | using namespace proxima::be; |
27 | using namespace proxima::be::server; |
28 | |
29 | class ProtoConverterTest : public testing::Test { |
30 | protected: |
31 | void SetUp() {} |
32 | |
33 | void TearDown() {} |
34 | }; |
35 | |
36 | TEST_F(ProtoConverterTest, TestConvertIndexDataSuccessWithJson) { |
37 | std::string index_value("[1,2,3,4,5,6]" ); |
38 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
39 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
40 | meta->set_data_type(DataTypes::VECTOR_FP32); |
41 | meta->set_dimension(6); |
42 | meta->set_name("field1" ); |
43 | int dimension = 6; |
44 | aitheta2::IndexParams *params = meta->mutable_parameters(); |
45 | params->set("dimension" , dimension); |
46 | proto::WriteRequest::IndexColumnMeta proto_meta; |
47 | proto_meta.set_dimension(dimension); |
48 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
49 | index::ColumnData column_data; |
50 | int ret = ProtoConverter::ConvertIndexData(index_value, *meta, proto_meta, |
51 | false, &column_data); |
52 | ASSERT_EQ(ret, 0); |
53 | ASSERT_EQ(column_data.column_name, "field1" ); |
54 | ASSERT_EQ(column_data.data_type, DataTypes::VECTOR_FP32); |
55 | ASSERT_EQ(column_data.dimension, 6); |
56 | const float *data = (const float *)(&(column_data.data[0])); |
57 | for (uint32_t i = 1; i <= column_data.dimension; ++i) { |
58 | ASSERT_FLOAT_EQ(1.0f * i, data[i - 1]); |
59 | } |
60 | } |
61 | |
62 | TEST_F(ProtoConverterTest, TestConvertIndexDataSuccessWithBytes) { |
63 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
64 | std::string index_value((const char *)vectors.data(), |
65 | vectors.size() * sizeof(float)); |
66 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
67 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
68 | meta->set_data_type(DataTypes::VECTOR_FP32); |
69 | int dimension = 6; |
70 | meta->set_dimension(dimension); |
71 | meta->set_name("field1" ); |
72 | proto::WriteRequest::IndexColumnMeta proto_meta; |
73 | proto_meta.set_dimension(dimension); |
74 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
75 | index::ColumnData column_data; |
76 | int ret = ProtoConverter::ConvertIndexData(index_value, *meta, proto_meta, |
77 | true, &column_data); |
78 | ASSERT_EQ(ret, 0); |
79 | ASSERT_EQ(column_data.column_name, "field1" ); |
80 | ASSERT_EQ(column_data.data_type, DataTypes::VECTOR_FP32); |
81 | ASSERT_EQ(column_data.dimension, 6); |
82 | const float *data = (const float *)(&(column_data.data[0])); |
83 | for (uint32_t i = 1; i <= column_data.dimension; ++i) { |
84 | ASSERT_FLOAT_EQ(1.0f * i, data[i - 1]); |
85 | } |
86 | } |
87 | |
88 | TEST_F(ProtoConverterTest, TestConvertIndexDataWithParseFailed) { |
89 | std::string index_value("[1,2,3,4,5]" ); |
90 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
91 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
92 | meta->set_data_type(DataTypes::VECTOR_FP32); |
93 | int dimension = 6; |
94 | meta->set_dimension(6); |
95 | proto::WriteRequest::IndexColumnMeta proto_meta; |
96 | proto_meta.set_dimension(dimension); |
97 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
98 | index::ColumnData column_data; |
99 | int ret = ProtoConverter::ConvertIndexData(index_value, *meta, proto_meta, |
100 | false, &column_data); |
101 | ASSERT_EQ(ret, ErrorCode_MismatchedDimension); |
102 | } |
103 | |
104 | TEST_F(ProtoConverterTest, TestConvertIndexDataWithUnsupportedIndexType) { |
105 | std::string index_value("[1,2,3,4,5,6]" ); |
106 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
107 | meta->set_index_type(IndexTypes::UNDEFINED); |
108 | meta->set_data_type(DataTypes::VECTOR_FP32); |
109 | int dimension = 6; |
110 | meta->set_dimension(dimension); |
111 | proto::WriteRequest::IndexColumnMeta proto_meta; |
112 | proto_meta.set_dimension(dimension); |
113 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
114 | index::ColumnData column_data; |
115 | int ret = ProtoConverter::ConvertIndexData(index_value, *meta, proto_meta, |
116 | false, &column_data); |
117 | ASSERT_EQ(ret, ErrorCode_InvalidIndexType); |
118 | } |
119 | |
120 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaFp32) { |
121 | std::string index_value("[1,2,3,4,5,6]" ); |
122 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
123 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
124 | meta->set_data_type(DataTypes::VECTOR_FP32); |
125 | uint32_t dimension = 6; |
126 | meta->set_dimension(dimension); |
127 | proto::WriteRequest::IndexColumnMeta proto_meta; |
128 | proto_meta.set_dimension(dimension); |
129 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
130 | std::string output_value; |
131 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
132 | index_value, *meta, proto_meta, &output_value); |
133 | ASSERT_EQ(ret, 0); |
134 | const float *data = (const float *)(&(output_value[0])); |
135 | for (uint32_t i = 1; i <= dimension; ++i) { |
136 | ASSERT_FLOAT_EQ(1.0f * i, data[i - 1]); |
137 | } |
138 | } |
139 | |
140 | TEST_F(ProtoConverterTest, |
141 | TestParseJsonIndexColumnValueaFp32WithTransformNoSupport) { |
142 | std::string index_value("[1,2,3,4,5,6]" ); |
143 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
144 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
145 | meta->set_data_type(DataTypes::VECTOR_FP32); |
146 | uint32_t dimension = 6; |
147 | meta->set_dimension(dimension); |
148 | proto::WriteRequest::IndexColumnMeta proto_meta; |
149 | proto_meta.set_dimension(dimension); |
150 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_INT8); |
151 | std::string output_value; |
152 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
153 | index_value, *meta, proto_meta, &output_value); |
154 | ASSERT_EQ(ret, ErrorCode_MismatchedDataType); |
155 | } |
156 | |
157 | TEST_F(ProtoConverterTest, |
158 | TestParseJsonIndexColumnValueaFp32WithTransformSuccess) { |
159 | std::string index_value("[1,2,3,4,5,6]" ); |
160 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
161 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
162 | meta->set_data_type(DataTypes::VECTOR_FP16); |
163 | uint32_t dimension = 6; |
164 | meta->set_dimension(dimension); |
165 | proto::WriteRequest::IndexColumnMeta proto_meta; |
166 | proto_meta.set_dimension(dimension); |
167 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
168 | std::string output_value; |
169 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
170 | index_value, *meta, proto_meta, &output_value); |
171 | ASSERT_EQ(ret, 0); |
172 | const uint16_t *data = (const uint16_t *)(&(output_value[0])); |
173 | for (uint32_t i = 1; i <= dimension; ++i) { |
174 | ASSERT_FLOAT_EQ(1.0f * i, ailego::FloatHelper::ToFP32(data[i - 1])); |
175 | } |
176 | } |
177 | |
178 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaFp16) { |
179 | std::string index_value("[1,2,3,4,5,6]" ); |
180 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
181 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
182 | meta->set_data_type(DataTypes::VECTOR_FP16); |
183 | uint32_t dimension = 6; |
184 | meta->set_dimension(dimension); |
185 | proto::WriteRequest::IndexColumnMeta proto_meta; |
186 | proto_meta.set_dimension(dimension); |
187 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP16); |
188 | std::string output_value; |
189 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
190 | index_value, *meta, proto_meta, &output_value); |
191 | ASSERT_EQ(ret, 0); |
192 | const uint16_t *data = (const uint16_t *)(&(output_value[0])); |
193 | for (uint32_t i = 1; i <= dimension; ++i) { |
194 | ASSERT_FLOAT_EQ(1.0f * i, ailego::FloatHelper::ToFP32(data[i - 1])); |
195 | } |
196 | } |
197 | |
198 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaInt16) { |
199 | std::string index_value("[1,2,3,4,5,6]" ); |
200 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
201 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
202 | meta->set_data_type(DataTypes::VECTOR_INT16); |
203 | uint32_t dimension = 6; |
204 | meta->set_dimension(dimension); |
205 | proto::WriteRequest::IndexColumnMeta proto_meta; |
206 | proto_meta.set_dimension(dimension); |
207 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_INT16); |
208 | std::string output_value; |
209 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
210 | index_value, *meta, proto_meta, &output_value); |
211 | ASSERT_EQ(ret, 0); |
212 | const int16_t *data = (const int16_t *)(&(output_value[0])); |
213 | for (uint32_t i = 1; i <= dimension; ++i) { |
214 | ASSERT_FLOAT_EQ((int16_t)i, data[i - 1]); |
215 | } |
216 | } |
217 | |
218 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaInt8) { |
219 | std::string index_value("[1,2,3,4,5,6]" ); |
220 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
221 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
222 | meta->set_data_type(DataTypes::VECTOR_INT8); |
223 | uint32_t dimension = 6; |
224 | meta->set_dimension(dimension); |
225 | proto::WriteRequest::IndexColumnMeta proto_meta; |
226 | proto_meta.set_dimension(dimension); |
227 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_INT8); |
228 | std::string output_value; |
229 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
230 | index_value, *meta, proto_meta, &output_value); |
231 | ASSERT_EQ(ret, 0); |
232 | const int8_t *data = (const int8_t *)(&(output_value[0])); |
233 | for (uint32_t i = 1; i <= dimension; ++i) { |
234 | ASSERT_FLOAT_EQ((int8_t)i, data[i - 1]); |
235 | } |
236 | } |
237 | |
238 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaInt4) { |
239 | std::string index_value("[1,2,3,4,5,6]" ); |
240 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
241 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
242 | meta->set_data_type(DataTypes::VECTOR_INT4); |
243 | uint32_t dimension = 6; |
244 | meta->set_dimension(dimension); |
245 | proto::WriteRequest::IndexColumnMeta proto_meta; |
246 | proto_meta.set_dimension(dimension); |
247 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_INT4); |
248 | std::string output_value; |
249 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
250 | index_value, *meta, proto_meta, &output_value); |
251 | ASSERT_EQ(ret, 0); |
252 | const uint8_t *data = (const uint8_t *)(&(output_value[0])); |
253 | for (uint32_t i = 1; i <= dimension / 2; ++i) { |
254 | ASSERT_FLOAT_EQ((int8_t)(2 * i - 1), (int8_t)(data[i - 1] & 0xf)); |
255 | ASSERT_FLOAT_EQ((uint8_t)(2 * i), (int8_t)(data[i - 1] >> 4)); |
256 | } |
257 | } |
258 | |
259 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaBinary32) { |
260 | std::string index_value("[1,2,3,4,5,6,7,8]" ); |
261 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
262 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
263 | meta->set_data_type(DataTypes::VECTOR_BINARY32); |
264 | uint32_t dimension = 256; |
265 | meta->set_dimension(dimension); |
266 | proto::WriteRequest::IndexColumnMeta proto_meta; |
267 | proto_meta.set_dimension(dimension); |
268 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_BINARY32); |
269 | std::string output_value; |
270 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
271 | index_value, *meta, proto_meta, &output_value); |
272 | ASSERT_EQ(ret, 0); |
273 | const uint32_t *data = (const uint32_t *)(&(output_value[0])); |
274 | for (uint32_t i = 1; i <= dimension / 32; ++i) { |
275 | ASSERT_FLOAT_EQ((uint32_t)i, data[i - 1]); |
276 | } |
277 | } |
278 | |
279 | TEST_F(ProtoConverterTest, TestParseJsonIndexColumnValueaBinary64) { |
280 | std::string index_value("[1,2,3,4,5,6,7,8]" ); |
281 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
282 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
283 | meta->set_data_type(DataTypes::VECTOR_BINARY64); |
284 | uint32_t dimension = 512; |
285 | meta->set_dimension(dimension); |
286 | proto::WriteRequest::IndexColumnMeta proto_meta; |
287 | proto_meta.set_dimension(dimension); |
288 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_BINARY64); |
289 | std::string output_value; |
290 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
291 | index_value, *meta, proto_meta, &output_value); |
292 | ASSERT_EQ(ret, 0); |
293 | const uint64_t *data = (const uint64_t *)(&(output_value[0])); |
294 | for (uint32_t i = 1; i <= dimension / 64; ++i) { |
295 | ASSERT_FLOAT_EQ((uint64_t)i, data[i - 1]); |
296 | } |
297 | } |
298 | |
299 | TEST_F(ProtoConverterTest, TestParseBytesIndexColumnValueWithoutTransform) { |
300 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
301 | std::string index_value((const char *)vectors.data(), |
302 | vectors.size() * sizeof(float)); |
303 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
304 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
305 | meta->set_data_type(DataTypes::VECTOR_FP32); |
306 | uint32_t dimension = 6; |
307 | meta->set_dimension(dimension); |
308 | proto::WriteRequest::IndexColumnMeta proto_meta; |
309 | proto_meta.set_dimension(dimension); |
310 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
311 | std::string output_value; |
312 | int ret = ProtoConverter::ParseBytesIndexColumnValue( |
313 | index_value, *meta, proto_meta, &output_value); |
314 | ASSERT_EQ(ret, 0); |
315 | const float *data = (const float *)(&(output_value[0])); |
316 | for (uint32_t i = 1; i <= dimension; ++i) { |
317 | ASSERT_FLOAT_EQ(1.0f * i, data[i - 1]); |
318 | } |
319 | } |
320 | |
321 | TEST_F(ProtoConverterTest, TestParseBytesIndexColumnValueWithTransform) { |
322 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
323 | std::string index_value((const char *)vectors.data(), |
324 | vectors.size() * sizeof(float)); |
325 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
326 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
327 | meta->set_data_type(DataTypes::VECTOR_FP16); |
328 | uint32_t dimension = 6; |
329 | meta->set_dimension(dimension); |
330 | proto::WriteRequest::IndexColumnMeta proto_meta; |
331 | proto_meta.set_dimension(dimension); |
332 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_FP32); |
333 | std::string output_value; |
334 | int ret = ProtoConverter::ParseBytesIndexColumnValue( |
335 | index_value, *meta, proto_meta, &output_value); |
336 | ASSERT_EQ(ret, 0); |
337 | const uint16_t *data = (const uint16_t *)(&(output_value[0])); |
338 | for (uint32_t i = 1; i <= dimension; ++i) { |
339 | ASSERT_FLOAT_EQ(1.0f * i, ailego::FloatHelper::ToFP32(data[i - 1])); |
340 | } |
341 | } |
342 | |
343 | TEST_F(ProtoConverterTest, TestParseBytesIndexColumnValueWithTransformFailed) { |
344 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
345 | std::string index_value((const char *)vectors.data(), |
346 | vectors.size() * sizeof(float)); |
347 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
348 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
349 | meta->set_data_type(DataTypes::VECTOR_FP16); |
350 | uint32_t dimension = 6; |
351 | meta->set_dimension(dimension); |
352 | proto::WriteRequest::IndexColumnMeta proto_meta; |
353 | proto_meta.set_dimension(dimension); |
354 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_INT16); |
355 | std::string output_value; |
356 | int ret = ProtoConverter::ParseBytesIndexColumnValue( |
357 | index_value, *meta, proto_meta, &output_value); |
358 | ASSERT_EQ(ret, ErrorCode_MismatchedDataType); |
359 | } |
360 | |
361 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueFp32) { |
362 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
363 | std::string index_value((const char *)vectors.data(), |
364 | vectors.size() * sizeof(float)); |
365 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
366 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
367 | meta->set_data_type(DataTypes::VECTOR_FP32); |
368 | uint32_t dimension = 6; |
369 | meta->set_dimension(dimension); |
370 | std::string output_value; |
371 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
372 | &output_value); |
373 | ASSERT_EQ(ret, 0); |
374 | const float *data = (const float *)(&(output_value[0])); |
375 | for (uint32_t i = 1; i <= dimension; ++i) { |
376 | ASSERT_FLOAT_EQ(1.0f * i, data[i - 1]); |
377 | } |
378 | } |
379 | |
380 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaFp16) { |
381 | std::vector<float> vectors = {1, 2, 3, 4, 5, 6}; |
382 | std::string index_value; |
383 | index_value.resize(12); |
384 | ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(vectors.data()), |
385 | vectors.size(), |
386 | reinterpret_cast<uint16_t *>(&index_value[0])); |
387 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
388 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
389 | meta->set_data_type(DataTypes::VECTOR_FP16); |
390 | uint32_t dimension = 6; |
391 | meta->set_dimension(dimension); |
392 | std::string output_value; |
393 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
394 | &output_value); |
395 | ASSERT_EQ(ret, 0); |
396 | const uint16_t *data = (const uint16_t *)(&(output_value[0])); |
397 | for (uint32_t i = 1; i <= dimension; ++i) { |
398 | ASSERT_FLOAT_EQ(1.0f * i, ailego::FloatHelper::ToFP32(data[i - 1])); |
399 | } |
400 | } |
401 | |
402 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaInt16) { |
403 | std::vector<int16_t> vectors = {1, 2, 3, 4, 5, 6}; |
404 | std::string index_value((const char *)vectors.data(), |
405 | vectors.size() * sizeof(int16_t)); |
406 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
407 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
408 | meta->set_data_type(DataTypes::VECTOR_INT16); |
409 | uint32_t dimension = 6; |
410 | meta->set_dimension(dimension); |
411 | std::string output_value; |
412 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
413 | &output_value); |
414 | ASSERT_EQ(ret, 0); |
415 | const int16_t *data = (const int16_t *)(&(output_value[0])); |
416 | for (uint32_t i = 1; i <= dimension; ++i) { |
417 | ASSERT_FLOAT_EQ((int16_t)i, data[i - 1]); |
418 | } |
419 | } |
420 | |
421 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaInt8) { |
422 | std::vector<int8_t> vectors = {1, 2, 3, 4, 5, 6}; |
423 | std::string index_value((const char *)vectors.data(), |
424 | vectors.size() * sizeof(int8_t)); |
425 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
426 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
427 | meta->set_data_type(DataTypes::VECTOR_INT8); |
428 | uint32_t dimension = 6; |
429 | meta->set_dimension(dimension); |
430 | std::string output_value; |
431 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
432 | &output_value); |
433 | ASSERT_EQ(ret, 0); |
434 | const int8_t *data = (const int8_t *)(&(output_value[0])); |
435 | for (uint32_t i = 1; i <= dimension; ++i) { |
436 | ASSERT_FLOAT_EQ((int8_t)i, data[i - 1]); |
437 | } |
438 | } |
439 | |
440 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaInt4) { |
441 | std::vector<int8_t> vectors = {1, 2, 3, 4, 5, 6}; |
442 | std::string index_value; |
443 | index_value.resize(3); |
444 | uint8_t *out = reinterpret_cast<uint8_t *>(&index_value[0]); |
445 | for (size_t i = 0; i < vectors.size(); i += 2) { |
446 | out[i / 2] = (static_cast<uint8_t>(vectors[i + 1]) << 4) | |
447 | (static_cast<uint8_t>(vectors[i]) & 0xF); |
448 | } |
449 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
450 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
451 | meta->set_data_type(DataTypes::VECTOR_INT4); |
452 | uint32_t dimension = 6; |
453 | meta->set_dimension(dimension); |
454 | std::string output_value; |
455 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
456 | &output_value); |
457 | ASSERT_EQ(ret, 0); |
458 | const uint8_t *data = (const uint8_t *)(&(output_value[0])); |
459 | for (uint32_t i = 1; i <= dimension / 2; ++i) { |
460 | ASSERT_FLOAT_EQ((int8_t)(2 * i - 1), (int8_t)(data[i - 1] & 0xf)); |
461 | ASSERT_FLOAT_EQ((uint8_t)(2 * i), (int8_t)(data[i - 1] >> 4)); |
462 | } |
463 | } |
464 | |
465 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaBinary32) { |
466 | std::vector<uint32_t> vectors = {1, 2, 3, 4, 5, 6, 7, 8}; |
467 | std::string index_value((const char *)vectors.data(), |
468 | vectors.size() * sizeof(uint32_t)); |
469 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
470 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
471 | meta->set_data_type(DataTypes::VECTOR_BINARY32); |
472 | uint32_t dimension = 256; |
473 | meta->set_dimension(dimension); |
474 | std::string output_value; |
475 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
476 | &output_value); |
477 | ASSERT_EQ(ret, 0); |
478 | const uint32_t *data = (const uint32_t *)(&(output_value[0])); |
479 | for (uint32_t i = 1; i <= dimension / 32; ++i) { |
480 | ASSERT_FLOAT_EQ((uint32_t)i, data[i - 1]); |
481 | } |
482 | } |
483 | |
484 | TEST_F(ProtoConverterTest, TestCopyBytesIndexColumnValueaBinary64) { |
485 | std::vector<uint64_t> vectors = {1, 2, 3, 4, 5, 6, 7, 8}; |
486 | std::string index_value((const char *)vectors.data(), |
487 | vectors.size() * sizeof(uint64_t)); |
488 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
489 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
490 | meta->set_data_type(DataTypes::VECTOR_BINARY64); |
491 | uint32_t dimension = 512; |
492 | meta->set_dimension(dimension); |
493 | std::string output_value; |
494 | int ret = ProtoConverter::CopyBytesIndexColumnValue(index_value, *meta, |
495 | &output_value); |
496 | ASSERT_EQ(ret, 0); |
497 | const uint64_t *data = (const uint64_t *)(&(output_value[0])); |
498 | for (uint32_t i = 1; i <= dimension / 64; ++i) { |
499 | ASSERT_FLOAT_EQ((uint64_t)i, data[i - 1]); |
500 | } |
501 | } |
502 | |
503 | TEST_F(ProtoConverterTest, TestParseIndexColumnFailedWithParseJsonVector) { |
504 | std::string index_value("[1,2,3,4,5,6,7,8" ); |
505 | meta::ColumnMetaPtr meta = std::make_shared<meta::ColumnMeta>(); |
506 | meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
507 | meta->set_data_type(DataTypes::VECTOR_BINARY64); |
508 | uint32_t dimension = 512; |
509 | meta->set_dimension(dimension); |
510 | proto::WriteRequest::IndexColumnMeta proto_meta; |
511 | proto_meta.set_dimension(dimension); |
512 | proto_meta.set_data_type(proto::DataType::DT_VECTOR_BINARY64); |
513 | std::string output_value; |
514 | int ret = ProtoConverter::ParseJsonIndexColumnValue( |
515 | index_value, *meta, proto_meta, &output_value); |
516 | ASSERT_EQ(ret, ErrorCode_MismatchedDimension); |
517 | } |
518 | |
519 | // TEST_F(ProtoConverterTest, TestCompare) { |
520 | // std::vector<std::string> vectors; |
521 | // size_t count = 100000; |
522 | // size_t dimension = 512; |
523 | |
524 | // std::cout << "Begin generate data: " << std::endl; |
525 | // for (size_t i = 0; i < count; ++i) { |
526 | // std::ostringstream oss; |
527 | // oss << 1.0 / (i + 1); |
528 | // for (size_t j = 1; j < dimension; ++j) { |
529 | // oss << "," << 1.0 / (i + 1 + j); |
530 | // } |
531 | // vectors.emplace_back(oss.str()); |
532 | // } |
533 | // std::cout << "End generate data: " << std::endl; |
534 | |
535 | // std::cout << "Begin process data: " << std::endl; |
536 | // uint64_t start = ailego::Monotime::MilliSeconds(); |
537 | // for (size_t i = 0; i < count; ++i) { |
538 | // std::vector<float> vector; |
539 | // ailego::StringHelper::Split(vectors[i], ",", &vector); |
540 | // if (vector.empty()) { |
541 | // std::cout << "Failed " << i << std::endl; |
542 | // continue; |
543 | // } |
544 | // } |
545 | // uint64_t end = ailego::Monotime::MilliSeconds(); |
546 | // std::cout << "End process data: " << std::endl; |
547 | |
548 | // float qps = count * 1000.0 / (end - start); |
549 | // std::cout << "total cost:" << (end - start) / 1000.0 << "s, qps: " << qps |
550 | // << std::endl; |
551 | // } |
552 | |
553 | // TEST_F(ProtoConverterTest, TestCompare1) { |
554 | // std::vector<std::string> vectors; |
555 | // size_t count = 100000; |
556 | // size_t dimension = 512; |
557 | |
558 | // std::cout << "Begin generate data: " << std::endl; |
559 | // for (size_t i = 0; i < count; ++i) { |
560 | // std::ostringstream oss; |
561 | // oss << "[" << 1.0 / (i + 1); |
562 | // for (size_t j = 1; j < dimension; ++j) { |
563 | // oss << "," << 1.0 / (i + 1 + j); |
564 | // } |
565 | // oss << "]"; |
566 | // vectors.emplace_back(oss.str()); |
567 | // } |
568 | // std::cout << "End generate data: " << std::endl; |
569 | |
570 | // std::cout << "Begin process data: " << std::endl; |
571 | // uint64_t start = ailego::Monotime::MilliSeconds(); |
572 | // for (size_t i = 0; i < count; ++i) { |
573 | // std::vector<float> vector; |
574 | // ailego::JsonValue root_node; |
575 | // if (!root_node.parse(vectors[i].c_str())) { |
576 | // std::cout << "Parse failed " << i << std::endl; |
577 | // continue; |
578 | // } |
579 | // ailego::JsonArray &array = root_node.as_array(); |
580 | // for (auto it = array.begin(); it != array.end(); ++it) { |
581 | // vector.emplace_back(it->as_float()); |
582 | // } |
583 | // if (vector.size() != dimension) { |
584 | // std::cout << "Failed " << i << std::endl; |
585 | // continue; |
586 | // } |
587 | // } |
588 | // uint64_t end = ailego::Monotime::MilliSeconds(); |
589 | // std::cout << "End process data: " << std::endl; |
590 | |
591 | // float qps = count * 1000.0 / (end - start); |
592 | // std::cout << "total cost:" << (end - start) / 1000.0 << "s, qps: " << qps |
593 | // << std::endl; |
594 | // } |
595 | |