1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | |
16 | * \author Hechong.xyf |
17 | * \date Oct 2019 |
18 | * \brief Interface of AiTheta Index Realtime Streamer |
19 | */ |
20 | |
21 | #ifndef __AITHETA2_INDEX_STREAMER_H__ |
22 | #define __AITHETA2_INDEX_STREAMER_H__ |
23 | |
24 | #include "index_context.h" |
25 | #include "index_helper.h" |
26 | #include "index_provider.h" |
27 | #include "index_stats.h" |
28 | #include "index_threads.h" |
29 | |
30 | namespace aitheta2 { |
31 | |
32 | /*! Index Streamer |
33 | */ |
34 | class IndexStreamer : public IndexModule { |
35 | public: |
36 | //! Index Streamer Pointer |
37 | typedef std::shared_ptr<IndexStreamer> Pointer; |
38 | |
39 | /*! Index Streamer Stats |
40 | */ |
41 | class Stats : public IndexStats { |
42 | public: |
43 | //! Set revision id |
44 | void set_revision_id(size_t rev) { |
45 | revision_id_ = rev; |
46 | } |
47 | |
48 | //! Set count of documents loaded |
49 | void set_loaded_count(size_t count) { |
50 | loaded_count_ = count; |
51 | } |
52 | |
53 | //! Set count of documents added |
54 | void set_added_count(size_t count) { |
55 | added_count_ = count; |
56 | } |
57 | |
58 | //! Set count of documents discarded |
59 | void set_discarded_count(size_t count) { |
60 | discarded_count_ = count; |
61 | } |
62 | |
63 | //! Set count of documents updated |
64 | void set_updated_count(size_t count) { |
65 | updated_count_ = count; |
66 | } |
67 | |
68 | //! Set count of documents deleted |
69 | void set_deleted_count(size_t count) { |
70 | deleted_count_ = count; |
71 | } |
72 | |
73 | //! Set size of index |
74 | void set_index_size(size_t count) { |
75 | index_size_ = count; |
76 | } |
77 | |
78 | //! Set size of index dumped |
79 | void set_dumped_size(size_t count) { |
80 | dumped_size_ = count; |
81 | } |
82 | |
83 | //! Retrieve create time |
84 | void set_create_time(uint64_t val) { |
85 | create_time_ = val; |
86 | } |
87 | |
88 | //! Retrieve update time |
89 | void set_update_time(uint64_t val) { |
90 | update_time_ = val; |
91 | } |
92 | |
93 | //! Retrieve revision id |
94 | size_t revision_id(void) const { |
95 | return revision_id_; |
96 | } |
97 | |
98 | //! Retrieve count of documents loaded |
99 | size_t loaded_count(void) const { |
100 | return loaded_count_; |
101 | } |
102 | |
103 | //! Retrieve count of documents added |
104 | size_t added_count(void) const { |
105 | return added_count_; |
106 | } |
107 | |
108 | //! Retrieve count of documents discarded |
109 | size_t discarded_count(void) const { |
110 | return discarded_count_; |
111 | } |
112 | |
113 | //! Retrieve count of documents updated |
114 | size_t updated_count(void) const { |
115 | return updated_count_; |
116 | } |
117 | |
118 | //! Retrieve count of documents deleted |
119 | size_t deleted_count(void) const { |
120 | return deleted_count_; |
121 | } |
122 | |
123 | //! Retrieve size of index |
124 | size_t index_size(void) const { |
125 | return index_size_; |
126 | } |
127 | |
128 | //! Retrieve size of index dumped |
129 | size_t dumped_size(void) const { |
130 | return dumped_size_; |
131 | } |
132 | |
133 | //! Retrieve check point of index |
134 | uint64_t check_point(void) const { |
135 | return check_point_; |
136 | } |
137 | |
138 | //! Retrieve create time of index |
139 | uint64_t create_time(void) const { |
140 | return create_time_; |
141 | } |
142 | |
143 | //! Retrieve update time of index |
144 | uint64_t update_time(void) const { |
145 | return update_time_; |
146 | } |
147 | |
148 | //! Retrieve count of documents loaded (mutable) |
149 | size_t *mutable_loaded_count(void) { |
150 | return &loaded_count_; |
151 | } |
152 | |
153 | //! Retrieve count of documents added (mutable) |
154 | size_t *mutable_added_count(void) { |
155 | return &added_count_; |
156 | } |
157 | |
158 | //! Retrieve count of documents discarded (mutable) |
159 | size_t *mutable_discarded_count(void) { |
160 | return &discarded_count_; |
161 | } |
162 | |
163 | //! Retrieve count of documents updated (mutable) |
164 | size_t *mutable_updated_count(void) { |
165 | return &updated_count_; |
166 | } |
167 | |
168 | //! Retrieve count of documents deleted (mutable) |
169 | size_t *mutable_deleted_count(void) { |
170 | return &deleted_count_; |
171 | } |
172 | |
173 | //! Retrieve size of index (mutable) |
174 | size_t *mutable_index_size(void) { |
175 | return &index_size_; |
176 | } |
177 | |
178 | //! Retrieve size of index dumped (mutable) |
179 | size_t *mutable_dumped_size(void) { |
180 | return &dumped_size_; |
181 | } |
182 | |
183 | //! Retrieve check point of index (mutable) |
184 | uint64_t *mutable_check_point(void) { |
185 | return &check_point_; |
186 | } |
187 | |
188 | //! Retrieve create time of index (mutable) |
189 | uint64_t *mutable_create_time(void) { |
190 | return &create_time_; |
191 | } |
192 | |
193 | //! Retrieve update time of index (mutable) |
194 | uint64_t *mutable_update_time(void) { |
195 | return &update_time_; |
196 | } |
197 | |
198 | private: |
199 | //! Members |
200 | size_t revision_id_{0u}; |
201 | size_t loaded_count_{0u}; |
202 | size_t added_count_{0u}; |
203 | size_t discarded_count_{0u}; |
204 | size_t updated_count_{0u}; |
205 | size_t deleted_count_{0u}; |
206 | size_t index_size_{0u}; |
207 | size_t dumped_size_{0u}; |
208 | uint64_t check_point_{0u}; |
209 | uint64_t create_time_{0u}; |
210 | uint64_t update_time_{0u}; |
211 | }; |
212 | |
213 | /*! Index Streamer Context |
214 | */ |
215 | struct Context : public IndexContext {}; |
216 | |
217 | /*! Index Streamer Provider |
218 | */ |
219 | struct Provider : public IndexProvider {}; |
220 | |
221 | //! Destructor |
222 | virtual ~IndexStreamer(void) {} |
223 | |
224 | //! Initialize the streamer |
225 | virtual int init(const IndexMeta &mt, const IndexParams ¶ms) = 0; |
226 | |
227 | //! Cleanup the streamer |
228 | virtual int cleanup(void) = 0; |
229 | |
230 | //! Create a context |
231 | virtual Context::Pointer create_context(void) const = 0; |
232 | |
233 | //! Similarity search |
234 | virtual int search_impl(const void *query, const IndexQueryMeta &qmeta, |
235 | Context::Pointer &context) const = 0; |
236 | |
237 | //! Similarity search |
238 | virtual int search_impl(const void *query, const IndexQueryMeta &qmeta, |
239 | uint32_t count, Context::Pointer &context) const = 0; |
240 | |
241 | //! Similarity brute force search |
242 | virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta, |
243 | Context::Pointer &context) const = 0; |
244 | |
245 | //! Similarity brute force search |
246 | virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta, |
247 | uint32_t count, |
248 | Context::Pointer &context) const = 0; |
249 | |
250 | //! Add a vector into index |
251 | virtual int add_impl(uint64_t key, const void *query, |
252 | const IndexQueryMeta &qmeta, |
253 | Context::Pointer &context) = 0; |
254 | |
255 | //! Update the vector in index |
256 | virtual int update_impl(uint64_t /*key*/, const void * /*query*/, |
257 | const IndexQueryMeta & /*qmeta*/, |
258 | Context::Pointer & /*context*/) { |
259 | return IndexError_NotImplemented; |
260 | } |
261 | |
262 | //! Delete the vector in index |
263 | virtual int remove_impl(uint64_t /*key*/, Context::Pointer & /*context*/) { |
264 | return IndexError_NotImplemented; |
265 | } |
266 | |
267 | //! Optimize the index |
268 | virtual int optimize_impl(aitheta2::IndexThreads::Pointer) { |
269 | return aitheta2::IndexError_NotImplemented; |
270 | } |
271 | |
272 | //! Open a index from storage |
273 | virtual int open(IndexStorage::Pointer stg) = 0; |
274 | |
275 | //! Flush index |
276 | virtual int flush(uint64_t check_point) = 0; |
277 | |
278 | //! Close index |
279 | virtual int close(void) = 0; |
280 | |
281 | //! Dump index into storage |
282 | virtual int dump(const IndexDumper::Pointer &dumper) = 0; |
283 | |
284 | //! Retrieve statistics |
285 | virtual const Stats &stats(void) const = 0; |
286 | |
287 | //! Retrieve meta of index |
288 | virtual const IndexMeta &meta(void) const = 0; |
289 | |
290 | //! Initialize the streamer with container |
291 | virtual int init(IndexContainer::Pointer cntr, const IndexParams ¶ms) { |
292 | IndexMeta mt; |
293 | int ret = IndexHelper::DeserializeFromContainer(cntr.get(), &mt); |
294 | if (ret == 0) { |
295 | ret = this->init(mt, params); |
296 | } |
297 | return ret; |
298 | } |
299 | |
300 | //! Create a streamer provider |
301 | virtual Provider::Pointer create_provider(void) const { |
302 | return Provider::Pointer(); |
303 | } |
304 | |
305 | //! Similarity search (FP16) |
306 | template <IndexMeta::FeatureTypes FT, |
307 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
308 | int search_bf(const ailego::Float16 *vec, size_t dim, |
309 | Context::Pointer &context) const { |
310 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), context); |
311 | } |
312 | |
313 | //! Similarity search (FP32) |
314 | template <IndexMeta::FeatureTypes FT, |
315 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
316 | int search_bf(const float *vec, size_t dim, Context::Pointer &context) const { |
317 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), context); |
318 | } |
319 | |
320 | //! Similarity search (INT8) |
321 | template <IndexMeta::FeatureTypes FT, |
322 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
323 | int search_bf(const int8_t *vec, size_t dim, |
324 | Context::Pointer &context) const { |
325 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), context); |
326 | } |
327 | |
328 | //! Similarity search (INT4) |
329 | template <IndexMeta::FeatureTypes FT, |
330 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
331 | int search_bf(const uint8_t *vec, size_t dim, |
332 | Context::Pointer &context) const { |
333 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), context); |
334 | } |
335 | |
336 | //! Similarity search (BINARY) |
337 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
338 | FT == IndexMeta::FT_BINARY32>::type> |
339 | int search_bf(const uint32_t *vec, size_t dim, |
340 | Context::Pointer &context) const { |
341 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), context); |
342 | } |
343 | |
344 | //! Similarity search in batch (FP16) |
345 | template <IndexMeta::FeatureTypes FT, |
346 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
347 | int search_bf(const ailego::Float16 *vec, size_t dim, size_t rows, |
348 | Context::Pointer &context) const { |
349 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
350 | } |
351 | |
352 | //! Similarity search in batch (FP32) |
353 | template <IndexMeta::FeatureTypes FT, |
354 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
355 | int search_bf(const float *vec, size_t dim, size_t rows, |
356 | Context::Pointer &context) const { |
357 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
358 | } |
359 | |
360 | //! Similarity search in batch (INT8) |
361 | template <IndexMeta::FeatureTypes FT, |
362 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
363 | int search_bf(const int8_t *vec, size_t dim, size_t rows, |
364 | Context::Pointer &context) const { |
365 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
366 | } |
367 | |
368 | //! Similarity Search in batch (INT4) |
369 | template <IndexMeta::FeatureTypes FT, |
370 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
371 | int search_bf(const uint8_t *vec, size_t dim, size_t rows, |
372 | Context::Pointer &context) const { |
373 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
374 | } |
375 | |
376 | //! Similarity Search in batch (BINARY) |
377 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
378 | FT == IndexMeta::FT_BINARY32>::type> |
379 | int search_bf(const uint32_t *vec, size_t dim, size_t rows, |
380 | Context::Pointer &context) const { |
381 | return this->search_bf_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
382 | } |
383 | |
384 | //! Similarity search (FP16) |
385 | template <IndexMeta::FeatureTypes FT, |
386 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
387 | int search(const ailego::Float16 *vec, size_t dim, |
388 | Context::Pointer &context) const { |
389 | return this->search_impl(vec, IndexQueryMeta(FT, dim), context); |
390 | } |
391 | |
392 | //! Similarity search (FP32) |
393 | template <IndexMeta::FeatureTypes FT, |
394 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
395 | int search(const float *vec, size_t dim, Context::Pointer &context) const { |
396 | return this->search_impl(vec, IndexQueryMeta(FT, dim), context); |
397 | } |
398 | |
399 | //! Similarity search (INT8) |
400 | template <IndexMeta::FeatureTypes FT, |
401 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
402 | int search(const int8_t *vec, size_t dim, Context::Pointer &context) const { |
403 | return this->search_impl(vec, IndexQueryMeta(FT, dim), context); |
404 | } |
405 | |
406 | //! Similarity search (INT4) |
407 | template <IndexMeta::FeatureTypes FT, |
408 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
409 | int search(const uint8_t *vec, size_t dim, Context::Pointer &context) const { |
410 | return this->search_impl(vec, IndexQueryMeta(FT, dim), context); |
411 | } |
412 | |
413 | //! Similarity search (BINARY32) |
414 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
415 | FT == IndexMeta::FT_BINARY32>::type> |
416 | int search(const uint32_t *vec, size_t dim, Context::Pointer &context) const { |
417 | return this->search_impl(vec, IndexQueryMeta(FT, dim), context); |
418 | } |
419 | |
420 | //! Similarity search in batch (FP16) |
421 | template <IndexMeta::FeatureTypes FT, |
422 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
423 | int search(const ailego::Float16 *vec, size_t dim, size_t rows, |
424 | Context::Pointer &context) const { |
425 | return this->search_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
426 | } |
427 | |
428 | //! Similarity search in batch (FP32) |
429 | template <IndexMeta::FeatureTypes FT, |
430 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
431 | int search(const float *vec, size_t dim, size_t rows, |
432 | Context::Pointer &context) const { |
433 | return this->search_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
434 | } |
435 | |
436 | //! Similarity search in batch (INT8) |
437 | template <IndexMeta::FeatureTypes FT, |
438 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
439 | int search(const int8_t *vec, size_t dim, size_t rows, |
440 | Context::Pointer &context) const { |
441 | return this->search_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
442 | } |
443 | |
444 | //! Similarity Search in batch (INT4) |
445 | template <IndexMeta::FeatureTypes FT, |
446 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
447 | int search(const uint8_t *vec, size_t dim, size_t rows, |
448 | Context::Pointer &context) const { |
449 | return this->search_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
450 | } |
451 | |
452 | //! Similarity Search in batch (BINARY) |
453 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
454 | FT == IndexMeta::FT_BINARY32>::type> |
455 | int search(const uint32_t *vec, size_t dim, size_t rows, |
456 | Context::Pointer &context) const { |
457 | return this->search_impl(vec, IndexQueryMeta(FT, dim), rows, context); |
458 | } |
459 | |
460 | //! Add a vector into index (FP16) |
461 | template <IndexMeta::FeatureTypes FT, |
462 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
463 | int add(uint64_t key, const ailego::Float16 *vec, size_t dim, |
464 | Context::Pointer &context) { |
465 | return this->add_impl(key, vec, IndexQueryMeta(FT, dim), context); |
466 | } |
467 | |
468 | //! Add a vector into index (FP32) |
469 | template <IndexMeta::FeatureTypes FT, |
470 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
471 | int add(uint64_t key, const float *vec, size_t dim, |
472 | Context::Pointer &context) { |
473 | return this->add_impl(key, vec, IndexQueryMeta(FT, dim), context); |
474 | } |
475 | |
476 | //! Add a vector into index (INT8) |
477 | template <IndexMeta::FeatureTypes FT, |
478 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
479 | int add(uint64_t key, const int8_t *vec, size_t dim, |
480 | Context::Pointer &context) { |
481 | return this->add_impl(key, vec, IndexQueryMeta(FT, dim), context); |
482 | } |
483 | |
484 | //! Add a vector into index (INT4) |
485 | template <IndexMeta::FeatureTypes FT, |
486 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
487 | int add(uint64_t key, const uint8_t *vec, size_t dim, |
488 | Context::Pointer &context) { |
489 | return this->add_impl(key, vec, IndexQueryMeta(FT, dim), context); |
490 | } |
491 | |
492 | //! Add a vector into index (BINARY) |
493 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
494 | FT == IndexMeta::FT_BINARY32>::type> |
495 | int add(uint64_t key, const uint32_t *vec, size_t dim, |
496 | Context::Pointer &context) { |
497 | return this->add_impl(key, vec, IndexQueryMeta(FT, dim), context); |
498 | } |
499 | |
500 | //! Update the vector to index (FP16) |
501 | template <IndexMeta::FeatureTypes FT, |
502 | typename = typename std::enable_if<FT == IndexMeta::FT_FP16>::type> |
503 | int update(uint64_t key, const ailego::Float16 *vec, size_t dim, |
504 | Context::Pointer &context) { |
505 | return this->update_impl(key, vec, IndexQueryMeta(FT, dim), context); |
506 | } |
507 | |
508 | //! Update the vector to index (FP32) |
509 | template <IndexMeta::FeatureTypes FT, |
510 | typename = typename std::enable_if<FT == IndexMeta::FT_FP32>::type> |
511 | int update(uint64_t key, const float *vec, size_t dim, |
512 | Context::Pointer &context) { |
513 | return this->update_impl(key, vec, IndexQueryMeta(FT, dim), context); |
514 | } |
515 | |
516 | //! Update the vector to index (INT8) |
517 | template <IndexMeta::FeatureTypes FT, |
518 | typename = typename std::enable_if<FT == IndexMeta::FT_INT8>::type> |
519 | int update(uint64_t key, const int8_t *vec, size_t dim, |
520 | Context::Pointer &context) { |
521 | return this->update_impl(key, vec, IndexQueryMeta(FT, dim), context); |
522 | } |
523 | |
524 | //! Update the vector in index (INT4) |
525 | template <IndexMeta::FeatureTypes FT, |
526 | typename = typename std::enable_if<FT == IndexMeta::FT_INT4>::type> |
527 | int update(uint64_t key, const uint8_t *vec, size_t dim, |
528 | Context::Pointer &context) { |
529 | return this->update_impl(key, vec, IndexQueryMeta(FT, dim), context); |
530 | } |
531 | |
532 | //! Update the vector in index (BINARY) |
533 | template <IndexMeta::FeatureTypes FT, typename = typename std::enable_if< |
534 | FT == IndexMeta::FT_BINARY32>::type> |
535 | int update(uint64_t key, const uint32_t *vec, size_t dim, |
536 | Context::Pointer &context) { |
537 | return this->update_impl(key, vec, IndexQueryMeta(FT, dim), context); |
538 | } |
539 | |
540 | //! Delete the vector in index |
541 | int remove(uint64_t key, Context::Pointer &context) { |
542 | return this->remove_impl(key, context); |
543 | } |
544 | |
545 | //! Optimize the index |
546 | int optimize(aitheta2::IndexThreads::Pointer threads) { |
547 | return this->optimize_impl(threads); |
548 | } |
549 | }; |
550 | |
551 | } // namespace aitheta2 |
552 | |
553 | #endif // __AITHETA2_INDEX_STREAMER_H__ |
554 | |