1/**
2 * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * File : column_searcher_test.cc
17 * Author: haichao.chc
18 * Date : 2021-01-02
19 * Brief : Unittest of ColumnSearcher
20 */
21
22#include "index/column/column_reader.h"
23#include <aitheta2/index_factory.h>
24#include <gtest/gtest.h>
25#include "index/column/column_indexer.h"
26
27using namespace proxima::be;
28using namespace proxima::be::index;
29
30class ColumnReaderTest : public testing::Test {
31 protected:
32 void SetUp() {
33 FileHelper::RemoveFile("./data.pxa.test_column.0");
34 FileHelper::RemoveFile("data.seg.0");
35 }
36
37 void TearDown() {}
38};
39
40TEST_F(ColumnReaderTest, TestGeneral) {
41 auto column_indexer =
42 ColumnIndexer::Create("test_collection", "./", 0, "test_column",
43 IndexTypes::PROXIMA_GRAPH_INDEX);
44
45 meta::ColumnMeta meta;
46 meta.set_name("test_column");
47 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
48 meta.set_data_type(DataTypes::VECTOR_FP32);
49 meta.set_dimension(16);
50
51 ReadOptions read_options;
52 read_options.use_mmap = true;
53 read_options.create_new = true;
54 int ret = column_indexer->open(meta, read_options);
55 ASSERT_EQ(ret, 0);
56
57 for (size_t i = 0; i < 1000; i++) {
58 std::vector<float> fvec(16U);
59 for (size_t j = 0; j < 16U; j++) {
60 fvec[j] = i * 1.0f;
61 }
62
63 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
64 ColumnData column_data;
65 column_data.column_name = "test_column";
66 column_data.data_type = DataTypes::VECTOR_FP32;
67 column_data.dimension = 16;
68 column_data.data = vector;
69 ret = column_indexer->insert(i, column_data);
70 ASSERT_EQ(ret, 0);
71 }
72
73 auto dumper = aitheta2::IndexFactory::CreateDumper("FileDumper");
74 ASSERT_NE(dumper, nullptr);
75
76 ret = dumper->create("data.seg.0");
77 ASSERT_EQ(ret, 0);
78
79 IndexDumperPtr column_dumper = std::make_shared<IndexSegmentDumper>(
80 dumper, COLUMN_DUMP_BLOCK + "test_column");
81
82 ret = column_indexer->dump(column_dumper);
83 ASSERT_EQ(ret, 0);
84
85 column_dumper->close();
86 dumper->close();
87 ret = column_indexer->close();
88 ASSERT_EQ(ret, 0);
89
90 auto column_reader =
91 ColumnReader::Create("test_collection", "./", 0, "test_column",
92 IndexTypes::PROXIMA_GRAPH_INDEX);
93 ASSERT_NE(column_reader, nullptr);
94
95 column_reader->set_concurrency(10);
96 read_options.create_new = false;
97 ret = column_reader->open(meta, read_options);
98 ASSERT_EQ(ret, 0);
99
100 // knn search
101 for (size_t i = 0; i < 1000; i++) {
102 std::vector<float> fvec(16U);
103 for (size_t j = 0; j < 16U; j++) {
104 fvec[j] = i * 1.0f;
105 }
106
107 IndexDocumentList result_list;
108 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
109 QueryParams query_params;
110 query_params.topk = 10;
111 ret = column_reader->search(query, query_params, nullptr, &result_list);
112 ASSERT_EQ(ret, 0);
113 ASSERT_EQ(result_list[0].score(), 0.0f);
114 ASSERT_EQ(result_list[0].key(), i);
115 }
116
117 // linear search
118 for (size_t i = 0; i < 1000; i++) {
119 std::vector<float> fvec(16U);
120 for (size_t j = 0; j < 16U; j++) {
121 fvec[j] = i * 1.0f;
122 }
123
124 IndexDocumentList result_list;
125 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
126 QueryParams query_params;
127 query_params.topk = 10;
128 query_params.is_linear = true;
129 ret = column_reader->search(query, query_params, nullptr, &result_list);
130 ASSERT_EQ(ret, 0);
131 ASSERT_EQ(result_list[0].score(), 0.0f);
132 ASSERT_EQ(result_list[0].key(), i);
133 }
134
135 column_reader->close();
136}
137
138TEST_F(ColumnReaderTest, TestQuantizeINT8InnerProduct) {
139 auto column_indexer =
140 ColumnIndexer::Create("test_collection_int8_ip", "./", 0, "test_column",
141 IndexTypes::PROXIMA_GRAPH_INDEX);
142 meta::ColumnMeta meta;
143 meta.set_name("test_column");
144 meta.set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
145 meta.set_data_type(DataTypes::VECTOR_FP32);
146 meta.set_dimension(16);
147 meta.mutable_parameters()->set("quantize_type", "DT_VECTOR_INT8");
148 meta.mutable_parameters()->set("metric_type", "InnerProduct");
149 ReadOptions read_options;
150 read_options.use_mmap = true;
151 read_options.create_new = true;
152 int ret = column_indexer->open(meta, read_options);
153 ASSERT_EQ(ret, 0);
154 for (size_t i = 0; i <= 1000; i++) {
155 std::vector<float> fvec(16U);
156 for (size_t j = 0; j < 16U; j++) {
157 fvec[j] = i * 0.001f;
158 }
159 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
160 ColumnData column_data;
161 column_data.column_name = "test_column";
162 column_data.data_type = DataTypes::VECTOR_FP32;
163 column_data.dimension = 16;
164 column_data.data = vector;
165 ret = column_indexer->insert(i, column_data);
166 ASSERT_EQ(ret, 0);
167 }
168 auto dumper = aitheta2::IndexFactory::CreateDumper("FileDumper");
169 ASSERT_NE(dumper, nullptr);
170 ret = dumper->create("data.seg.0");
171 ASSERT_EQ(ret, 0);
172 IndexDumperPtr column_dumper = std::make_shared<IndexSegmentDumper>(
173 dumper, COLUMN_DUMP_BLOCK + "test_column");
174 ret = column_indexer->dump(column_dumper);
175 ASSERT_EQ(ret, 0);
176 column_dumper->close();
177 dumper->close();
178 ret = column_indexer->close();
179 ASSERT_EQ(ret, 0);
180 auto column_reader =
181 ColumnReader::Create("test_collection", "./", 0, "test_column",
182 IndexTypes::PROXIMA_GRAPH_INDEX);
183 ASSERT_NE(column_reader, nullptr);
184 column_reader->set_concurrency(10);
185 read_options.create_new = false;
186 ret = column_reader->open(meta, read_options);
187 ASSERT_EQ(ret, 0);
188 std::vector<float> fvec(16U);
189 for (size_t j = 0; j < 16U; j++) {
190 fvec[j] = 1.0f;
191 }
192 IndexDocumentList result_list;
193 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
194 QueryParams query_params;
195 query_params.topk = 10;
196 ret = column_reader->search(query, query_params, nullptr, &result_list);
197 ASSERT_EQ(ret, 0);
198 LOG_DEBUG("%f", result_list[0].score());
199 ASSERT_EQ(1000, result_list[0].key());
200 ASSERT_NEAR(result_list[0].score(), 16.0f, 0.1f);
201 column_reader->close();
202}