1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "index/segment/persist_segment.h" |
18 | #include <gtest/gtest.h> |
19 | #include "index/segment/memory_segment.h" |
20 | |
21 | using namespace proxima::be; |
22 | using namespace proxima::be::index; |
23 | |
24 | class PersistSegmentTest : public testing::Test { |
25 | protected: |
26 | void SetUp() { |
27 | char cmd_buf[100]; |
28 | snprintf(cmd_buf, 100, "rm -rf ./teachers/" ); |
29 | system(cmd_buf); |
30 | FillSchema(); |
31 | } |
32 | |
33 | void TearDown() {} |
34 | |
35 | void FillSchema() { |
36 | schema_ = std::make_shared<meta::CollectionMeta>(); |
37 | meta::ColumnMetaPtr column_meta = std::make_shared<meta::ColumnMeta>(); |
38 | column_meta->set_name("face" ); |
39 | column_meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX); |
40 | column_meta->set_data_type(DataTypes::VECTOR_FP32); |
41 | column_meta->set_dimension(16); |
42 | column_meta->mutable_parameters()->set("metric_type" , "SquaredEuclidean" ); |
43 | schema_->append(column_meta); |
44 | schema_->set_name("teachers" ); |
45 | } |
46 | |
47 | protected: |
48 | meta::CollectionMetaPtr schema_{}; |
49 | }; |
50 | |
51 | TEST_F(PersistSegmentTest, TestGeneral) { |
52 | DeleteStore delete_store("teachers" , "./teachers/" ); |
53 | ReadOptions read_options; |
54 | read_options.use_mmap = true; |
55 | read_options.create_new = true; |
56 | int ret = delete_store.open(read_options); |
57 | ASSERT_EQ(ret, 0); |
58 | |
59 | IDMap id_map("teachers" , "./teachers/" ); |
60 | ret = id_map.open(read_options); |
61 | ASSERT_EQ(ret, 0); |
62 | |
63 | SegmentMeta segment_meta; |
64 | segment_meta.segment_id = 0; |
65 | |
66 | MemorySegmentPtr memory_segment = |
67 | MemorySegment::Create("teachers" , "./teachers/" , segment_meta, |
68 | schema_.get(), &delete_store, &id_map, 5); |
69 | ASSERT_TRUE(memory_segment != nullptr); |
70 | |
71 | ret = memory_segment->open(read_options); |
72 | ASSERT_EQ(ret, 0); |
73 | |
74 | for (size_t i = 0; i < 1000; i++) { |
75 | Record record; |
76 | record.primary_key = i; |
77 | record.lsn = i; |
78 | record.forward_data = "hello" ; |
79 | |
80 | CollectionDataset::ColumnData new_column; |
81 | new_column.column_name = "face" ; |
82 | new_column.data_type = DataTypes::VECTOR_FP32; |
83 | new_column.dimension = 16U; |
84 | |
85 | std::vector<float> fvec(16U); |
86 | for (size_t j = 0; j < 16U; j++) { |
87 | fvec[j] = i * 1.0f; |
88 | } |
89 | std::string vector((char *)fvec.data(), fvec.size() * sizeof(float)); |
90 | new_column.data = vector; |
91 | record.column_datas.emplace_back(new_column); |
92 | |
93 | idx_t doc_id; |
94 | ret = memory_segment->insert(record, &doc_id); |
95 | ASSERT_EQ(ret, 0); |
96 | ASSERT_EQ(doc_id, i); |
97 | id_map.insert(record.primary_key, doc_id); |
98 | } |
99 | |
100 | ret = memory_segment->dump(); |
101 | ASSERT_EQ(ret, 0); |
102 | |
103 | PersistSegmentPtr persist_segment = PersistSegment::Create( |
104 | "teachers" , "./teachers/" , memory_segment->segment_meta(), schema_.get(), |
105 | &delete_store, &id_map, 5); |
106 | ASSERT_NE(persist_segment, nullptr); |
107 | |
108 | read_options.create_new = false; |
109 | ret = persist_segment->load(read_options); |
110 | ASSERT_EQ(ret, 0); |
111 | |
112 | auto &meta0 = memory_segment->segment_meta(); |
113 | auto &meta1 = persist_segment->segment_meta(); |
114 | ASSERT_EQ(meta0.segment_id, meta1.segment_id); |
115 | ASSERT_EQ(meta0.state, meta1.state); |
116 | ASSERT_EQ(meta0.index_file_size, meta1.index_file_size); |
117 | ASSERT_EQ(meta0.min_doc_id, meta1.min_doc_id); |
118 | ASSERT_EQ(meta0.max_doc_id, meta1.max_doc_id); |
119 | ASSERT_EQ(meta0.min_primary_key, meta1.min_primary_key); |
120 | ASSERT_EQ(meta0.max_primary_key, meta1.max_primary_key); |
121 | ASSERT_EQ(meta0.min_timestamp, meta1.min_timestamp); |
122 | ASSERT_EQ(meta0.max_timestamp, meta1.max_timestamp); |
123 | |
124 | for (size_t i = 0; i < 1000; i++) { |
125 | std::vector<float> fvec(16U); |
126 | for (size_t j = 0; j < 16U; j++) { |
127 | fvec[j] = i * 1.0f; |
128 | } |
129 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
130 | QueryParams query_params; |
131 | query_params.topk = 10; |
132 | query_params.data_type = DataTypes::VECTOR_FP32; |
133 | query_params.dimension = 16; |
134 | |
135 | QueryResultList result_list; |
136 | ret = |
137 | persist_segment->knn_search("face" , query, query_params, &result_list); |
138 | ASSERT_EQ(ret, 0); |
139 | |
140 | ASSERT_EQ(result_list[0].primary_key, i); |
141 | ASSERT_EQ(result_list[0].score, 0.0f); |
142 | ASSERT_EQ(result_list[0].lsn, i); |
143 | ASSERT_EQ(result_list[0].forward_data, "hello" ); |
144 | } |
145 | |
146 | for (size_t i = 0; i < 1000; i++) { |
147 | std::vector<float> fvec(16U); |
148 | for (size_t j = 0; j < 16U; j++) { |
149 | fvec[j] = i * 1.0f; |
150 | } |
151 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
152 | QueryParams query_params; |
153 | query_params.topk = 10; |
154 | query_params.data_type = DataTypes::VECTOR_FP32; |
155 | query_params.dimension = 16; |
156 | query_params.radius = 0.1f; |
157 | |
158 | QueryResultList result_list; |
159 | ret = |
160 | persist_segment->knn_search("face" , query, query_params, &result_list); |
161 | ASSERT_EQ(ret, 0); |
162 | |
163 | ASSERT_EQ(result_list.size(), 1); |
164 | ASSERT_EQ(result_list[0].primary_key, i); |
165 | ASSERT_EQ(result_list[0].score, 0.0f); |
166 | ASSERT_EQ(result_list[0].lsn, i); |
167 | ASSERT_EQ(result_list[0].forward_data, "hello" ); |
168 | } |
169 | |
170 | for (size_t i = 0; i < 1000; i++) { |
171 | QueryResult result; |
172 | ret = persist_segment->kv_search(i, &result); |
173 | ASSERT_EQ(ret, 0); |
174 | ASSERT_EQ(result.primary_key, i); |
175 | ASSERT_EQ(result.score, 0.0f); |
176 | ASSERT_EQ(result.lsn, i); |
177 | ASSERT_EQ(result.forward_data, "hello" ); |
178 | } |
179 | |
180 | for (size_t i = 0; i < 1000; i++) { |
181 | ret = delete_store.insert(i); |
182 | ASSERT_EQ(ret, 0); |
183 | } |
184 | |
185 | for (size_t i = 0; i < 1000; i++) { |
186 | std::vector<float> fvec(16U); |
187 | for (size_t j = 0; j < 16U; j++) { |
188 | fvec[j] = i * 1.0f; |
189 | } |
190 | std::string query((char *)fvec.data(), fvec.size() * sizeof(float)); |
191 | QueryParams query_params; |
192 | query_params.topk = 10; |
193 | query_params.data_type = DataTypes::VECTOR_FP32; |
194 | query_params.dimension = 16; |
195 | |
196 | QueryResultList result_list; |
197 | ret = |
198 | persist_segment->knn_search("face" , query, query_params, &result_list); |
199 | ASSERT_EQ(ret, 0); |
200 | ASSERT_EQ(result_list.size(), 0); |
201 | } |
202 | |
203 | for (size_t i = 0; i < 1000; i++) { |
204 | QueryResult result; |
205 | ret = persist_segment->kv_search(i, &result); |
206 | ASSERT_EQ(ret, 0); |
207 | ASSERT_EQ(result.primary_key, INVALID_KEY); |
208 | } |
209 | } |
210 | |