1/**
2 * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "index/segment/persist_segment.h"
18#include <gtest/gtest.h>
19#include "index/segment/memory_segment.h"
20
21using namespace proxima::be;
22using namespace proxima::be::index;
23
24class PersistSegmentTest : public testing::Test {
25 protected:
26 void SetUp() {
27 char cmd_buf[100];
28 snprintf(cmd_buf, 100, "rm -rf ./teachers/");
29 system(cmd_buf);
30 FillSchema();
31 }
32
33 void TearDown() {}
34
35 void FillSchema() {
36 schema_ = std::make_shared<meta::CollectionMeta>();
37 meta::ColumnMetaPtr column_meta = std::make_shared<meta::ColumnMeta>();
38 column_meta->set_name("face");
39 column_meta->set_index_type(IndexTypes::PROXIMA_GRAPH_INDEX);
40 column_meta->set_data_type(DataTypes::VECTOR_FP32);
41 column_meta->set_dimension(16);
42 column_meta->mutable_parameters()->set("metric_type", "SquaredEuclidean");
43 schema_->append(column_meta);
44 schema_->set_name("teachers");
45 }
46
47 protected:
48 meta::CollectionMetaPtr schema_{};
49};
50
51TEST_F(PersistSegmentTest, TestGeneral) {
52 DeleteStore delete_store("teachers", "./teachers/");
53 ReadOptions read_options;
54 read_options.use_mmap = true;
55 read_options.create_new = true;
56 int ret = delete_store.open(read_options);
57 ASSERT_EQ(ret, 0);
58
59 IDMap id_map("teachers", "./teachers/");
60 ret = id_map.open(read_options);
61 ASSERT_EQ(ret, 0);
62
63 SegmentMeta segment_meta;
64 segment_meta.segment_id = 0;
65
66 MemorySegmentPtr memory_segment =
67 MemorySegment::Create("teachers", "./teachers/", segment_meta,
68 schema_.get(), &delete_store, &id_map, 5);
69 ASSERT_TRUE(memory_segment != nullptr);
70
71 ret = memory_segment->open(read_options);
72 ASSERT_EQ(ret, 0);
73
74 for (size_t i = 0; i < 1000; i++) {
75 Record record;
76 record.primary_key = i;
77 record.lsn = i;
78 record.forward_data = "hello";
79
80 CollectionDataset::ColumnData new_column;
81 new_column.column_name = "face";
82 new_column.data_type = DataTypes::VECTOR_FP32;
83 new_column.dimension = 16U;
84
85 std::vector<float> fvec(16U);
86 for (size_t j = 0; j < 16U; j++) {
87 fvec[j] = i * 1.0f;
88 }
89 std::string vector((char *)fvec.data(), fvec.size() * sizeof(float));
90 new_column.data = vector;
91 record.column_datas.emplace_back(new_column);
92
93 idx_t doc_id;
94 ret = memory_segment->insert(record, &doc_id);
95 ASSERT_EQ(ret, 0);
96 ASSERT_EQ(doc_id, i);
97 id_map.insert(record.primary_key, doc_id);
98 }
99
100 ret = memory_segment->dump();
101 ASSERT_EQ(ret, 0);
102
103 PersistSegmentPtr persist_segment = PersistSegment::Create(
104 "teachers", "./teachers/", memory_segment->segment_meta(), schema_.get(),
105 &delete_store, &id_map, 5);
106 ASSERT_NE(persist_segment, nullptr);
107
108 read_options.create_new = false;
109 ret = persist_segment->load(read_options);
110 ASSERT_EQ(ret, 0);
111
112 auto &meta0 = memory_segment->segment_meta();
113 auto &meta1 = persist_segment->segment_meta();
114 ASSERT_EQ(meta0.segment_id, meta1.segment_id);
115 ASSERT_EQ(meta0.state, meta1.state);
116 ASSERT_EQ(meta0.index_file_size, meta1.index_file_size);
117 ASSERT_EQ(meta0.min_doc_id, meta1.min_doc_id);
118 ASSERT_EQ(meta0.max_doc_id, meta1.max_doc_id);
119 ASSERT_EQ(meta0.min_primary_key, meta1.min_primary_key);
120 ASSERT_EQ(meta0.max_primary_key, meta1.max_primary_key);
121 ASSERT_EQ(meta0.min_timestamp, meta1.min_timestamp);
122 ASSERT_EQ(meta0.max_timestamp, meta1.max_timestamp);
123
124 for (size_t i = 0; i < 1000; i++) {
125 std::vector<float> fvec(16U);
126 for (size_t j = 0; j < 16U; j++) {
127 fvec[j] = i * 1.0f;
128 }
129 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
130 QueryParams query_params;
131 query_params.topk = 10;
132 query_params.data_type = DataTypes::VECTOR_FP32;
133 query_params.dimension = 16;
134
135 QueryResultList result_list;
136 ret =
137 persist_segment->knn_search("face", query, query_params, &result_list);
138 ASSERT_EQ(ret, 0);
139
140 ASSERT_EQ(result_list[0].primary_key, i);
141 ASSERT_EQ(result_list[0].score, 0.0f);
142 ASSERT_EQ(result_list[0].lsn, i);
143 ASSERT_EQ(result_list[0].forward_data, "hello");
144 }
145
146 for (size_t i = 0; i < 1000; i++) {
147 std::vector<float> fvec(16U);
148 for (size_t j = 0; j < 16U; j++) {
149 fvec[j] = i * 1.0f;
150 }
151 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
152 QueryParams query_params;
153 query_params.topk = 10;
154 query_params.data_type = DataTypes::VECTOR_FP32;
155 query_params.dimension = 16;
156 query_params.radius = 0.1f;
157
158 QueryResultList result_list;
159 ret =
160 persist_segment->knn_search("face", query, query_params, &result_list);
161 ASSERT_EQ(ret, 0);
162
163 ASSERT_EQ(result_list.size(), 1);
164 ASSERT_EQ(result_list[0].primary_key, i);
165 ASSERT_EQ(result_list[0].score, 0.0f);
166 ASSERT_EQ(result_list[0].lsn, i);
167 ASSERT_EQ(result_list[0].forward_data, "hello");
168 }
169
170 for (size_t i = 0; i < 1000; i++) {
171 QueryResult result;
172 ret = persist_segment->kv_search(i, &result);
173 ASSERT_EQ(ret, 0);
174 ASSERT_EQ(result.primary_key, i);
175 ASSERT_EQ(result.score, 0.0f);
176 ASSERT_EQ(result.lsn, i);
177 ASSERT_EQ(result.forward_data, "hello");
178 }
179
180 for (size_t i = 0; i < 1000; i++) {
181 ret = delete_store.insert(i);
182 ASSERT_EQ(ret, 0);
183 }
184
185 for (size_t i = 0; i < 1000; i++) {
186 std::vector<float> fvec(16U);
187 for (size_t j = 0; j < 16U; j++) {
188 fvec[j] = i * 1.0f;
189 }
190 std::string query((char *)fvec.data(), fvec.size() * sizeof(float));
191 QueryParams query_params;
192 query_params.topk = 10;
193 query_params.data_type = DataTypes::VECTOR_FP32;
194 query_params.dimension = 16;
195
196 QueryResultList result_list;
197 ret =
198 persist_segment->knn_search("face", query, query_params, &result_list);
199 ASSERT_EQ(ret, 0);
200 ASSERT_EQ(result_list.size(), 0);
201 }
202
203 for (size_t i = 0; i < 1000; i++) {
204 QueryResult result;
205 ret = persist_segment->kv_search(i, &result);
206 ASSERT_EQ(ret, 0);
207 ASSERT_EQ(result.primary_key, INVALID_KEY);
208 }
209}
210