1/**
2 * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15
16 * \author Haichao.chc
17 * \date Oct 2020
18 * \brief Abstract class to describe proxima client interface and action.
19 * You can get more details usage from examples
20 */
21
22#pragma once
23
24#include <memory>
25#include <string>
26#include <vector>
27
28namespace proxima {
29namespace be {
30
31struct Status;
32struct ChannelOptions;
33struct CollectionConfig;
34struct CollectionInfo;
35struct CollectionStats;
36class WriteRequest;
37class QueryRequest;
38class Document;
39class QueryResponse;
40class GetDocumentRequest;
41class GetDocumentResponse;
42class ProximaSearchClient;
43
44using ProximaSearchClientPtr = std::shared_ptr<ProximaSearchClient>;
45
46/**
47 * ProximaSearchClient wrappers the operations used to call proxima search
48 * engine's service. Server may be running on another machines. It shields
49 * implementation of communication protocol and rpc details, and provide
50 * extremely high bench performance.
51 *
52 * Usage exp:
53 * auto client = ProximaSearchClient::Create();
54 * if (client != nullptr) {
55 * client->connect(ChannelOptions("127.0.0.1:16000"));
56 * ...
57 * client->create_collection();
58 * client->close();
59 * }
60 *
61 * Please read the examples/client_example.cc for more details.
62 *
63 * Note: the functions of this class are sync call.
64 */
65class ProximaSearchClient {
66 public:
67 //! Destructor
68 virtual ~ProximaSearchClient() = default;
69
70 /// @brief Create a client instance and return its shared ptr.
71 ///
72 /// @param type Client type, support "GrpcClient" and "HttpClient" now.
73 /// @return Shared ptr pointed to client impl.
74 ///
75 /// @note If input type is wrong, it may return nullptr
76 static ProximaSearchClientPtr Create(const std::string &type);
77
78 //! Create a shared ptr of client with default type
79 static ProximaSearchClientPtr Create();
80
81 /// @brief Try to connect remote server and establish connection.
82 ///
83 /// @param options Socket connection relative configs.
84 /// @return Status.code 0 means success, other means fail
85 ///
86 /// @note This function will try to send a list collections command
87 /// to test if the server alive.
88 virtual Status connect(const ChannelOptions &options) = 0;
89
90 //! Close connection to remote server and cleanup self
91 virtual Status close() = 0;
92
93 /// @brief Create a collection with specific config.
94 ///
95 /// @param config Collection config
96 /// @return Status.code 0 means success, other means fail
97 virtual Status create_collection(const CollectionConfig &config) = 0;
98
99 /// @brief Drop a collection with specific name.
100 ///
101 /// @param collection_name Collection name
102 /// @return Status.code 0 means success, other means fail
103 virtual Status drop_collection(const std::string &collection_name) = 0;
104
105 /// @brief Show the detailed information of collection.
106 ///
107 /// @param[in] collection_name Collection name
108 /// @param[out] collection_info Collection information
109 /// @return Status.code 0 means success, other means fail
110 virtual Status describe_collection(const std::string &collection_name,
111 CollectionInfo *collection_info) = 0;
112
113 /// @brief Get collection statics.
114 ///
115 /// @param[in] collection_name Collection name
116 /// @param[out] stats Collection statistics struct
117 /// @return Status.code 0 means success, other means fail.
118 virtual Status stats_collection(const std::string &collection_name,
119 CollectionStats *stats) = 0;
120
121 /// @brief List all collections.
122 ///
123 /// @param[out] collections Collection infomations
124 /// @return Status.code 0 means success, other means fail
125 virtual Status list_collections(std::vector<CollectionInfo> *collections) = 0;
126
127 /// @brief Insert/Update/Delete records.
128 ///
129 /// @param request Write request
130 /// @return Status.code means success, other means fail
131 virtual Status write(const WriteRequest &request) = 0;
132
133 /// @brief Knn query similar results
134 ///
135 /// @param[in] request Query request
136 /// @param[out] respnose Query response
137 /// @return Status.code means success, other means fail
138 virtual Status query(const QueryRequest &request,
139 QueryResponse *response) = 0;
140
141 /// @brief Get document by primary key
142 ///
143 /// @param[in] request Get document request
144 /// @param[out] response Get document response
145 /// @return Status.code means success, other means fail
146 virtual Status get_document_by_key(const GetDocumentRequest &request,
147 GetDocumentResponse *response) = 0;
148};
149
150/**
151 * IndexColumn's index type, only supports PROXIMA_GRAPH_INDEX for vector.
152 */
153enum class IndexType : uint32_t { UNDEFINED = 0, PROXIMA_GRAPH_INDEX = 1 };
154
155/**
156 * Supported input data type.
157 */
158enum class DataType : uint32_t {
159 UNDEFINED = 0,
160 BINARY = 1,
161 STRING = 2,
162 BOOL = 3,
163 INT32 = 4,
164 INT64 = 5,
165 UINT32 = 6,
166 UINT64 = 7,
167 FLOAT = 8,
168 DOUBLE = 9,
169
170 VECTOR_BINARY32 = 20,
171 VECTOR_BINARY64 = 21,
172 VECTOR_FP16 = 22,
173 VECTOR_FP32 = 23,
174 VECTOR_FP64 = 24,
175 VECTOR_INT4 = 25,
176 VECTOR_INT8 = 26,
177 VECTOR_INT16 = 27
178};
179
180/**
181 * Operation type of records.
182 */
183enum class OperationType : uint32_t { INSERT = 0, UPDATE = 1, DELETE = 2 };
184
185/**
186 * Status struct wrappers remote server's response.
187 */
188struct Status {
189 /// Response error code
190 /// 0 means success
191 /// ~0 means error
192 int code{0U};
193
194 /// Response error message, default is "Success"
195 std::string reason{"Success"};
196};
197
198/**
199 * ChannelOptions represents the connection config.
200 */
201struct ChannelOptions {
202 /// Host name of proxima be server
203 /// For exapmle: "127.0.0.1:16000"
204 /// Required field
205 std::string host{};
206
207 /// Max rpc duration out over server
208 /// Optional field, default 1000
209 uint32_t timeout_ms{1000U};
210
211 /// Max retry times when rpc failed
212 /// Optional filed, default 3
213 uint32_t max_retry{3U};
214
215 /// Connection pool count
216 /// Optional filed, default 1
217 uint32_t connection_count{1};
218
219 ChannelOptions(const std::string &val) : host(val) {}
220};
221
222/**
223 * Common key-value pair struct
224 */
225struct KVPair {
226 std::string key{};
227 std::string value{};
228};
229
230/**
231 * IndexColumnParam represents the index config of index column.
232 */
233struct IndexColumnParam {
234 /// Column name
235 /// Required field
236 std::string column_name{};
237
238 /// Column index type
239 /// Optional field, default IndexType::PROXIMA_GRAPH_INDEX
240 IndexType index_type{IndexType::PROXIMA_GRAPH_INDEX};
241
242 /// Stored data type
243 /// Optional filed, default DataType::VECTOR_FP32
244 DataType data_type{DataType::VECTOR_FP32};
245
246 /// Stored data dimension
247 /// Optional filed, default 0
248 uint32_t dimension{0U};
249
250 /// Extra params for column index
251 /// Optional field
252 /// For example:
253 /// {"ef_construction": "400", "ef_search": "300"}
254 std::vector<KVPair> extra_params{};
255
256 IndexColumnParam() = default;
257
258 IndexColumnParam(const std::string &val1, DataType val2, uint32_t val3)
259 : column_name(val1), data_type(val2), dimension(val3) {}
260};
261
262/**
263 * DatabaseRepository represents database config which stores
264 * source data, it's like some kind of ETL config.
265 */
266struct DatabaseRepository {
267 /// Repository name, make sure it's unique
268 /// Required field
269 std::string repository_name{};
270
271 /// Database connection uri, like JDBC string format
272 /// Required field
273 std::string connection_uri{};
274
275 /// Table name in database
276 /// Required field
277 std::string table_name{};
278
279 /// User name which connect to database
280 /// Optional field, default empty
281 std::string user{};
282
283 /// Password relative to user name
284 /// Optional password, default empty
285 std::string password{};
286};
287
288/**
289 * CollectionConfig describes the config options of collection.
290 * It includes description of index columns and forward columns.
291 * Index columns means that this column data is for knn searching.
292 * Forward columns means that this column data is just for display,
293 * which is not anticipating in search process.
294 */
295struct CollectionConfig {
296 /// Collection name, it should be unique
297 /// Required field
298 std::string collection_name{};
299
300 /// Collection will split into serveral segments
301 /// This param means max doc limits in one segment
302 /// Optional field, default 0, means no limit
303 uint32_t max_docs_per_segment{0U};
304
305 /// Forward column names
306 /// Optional field
307 std::vector<std::string> forward_columns{};
308
309 /// Index column infos
310 /// Required filed
311 std::vector<IndexColumnParam> index_columns{};
312
313 /// Database repository config
314 /// Optional field, default empty
315 DatabaseRepository database_repository{};
316};
317
318/**
319 * CollectionInfo describes the detailed information of collection,
320 * which is ProximaSE server returned.
321 */
322struct CollectionInfo {
323 enum class CollectionStatus : uint32_t {
324 INITIALIZED = 0,
325 SERVING = 1,
326 DRPPED = 2
327 };
328
329 //! Collection name
330 std::string collection_name{};
331
332 //! Collection status
333 CollectionStatus collection_status{CollectionStatus::INITIALIZED};
334
335 //! Unique uuid to a collection
336 std::string collection_uuid{};
337
338 //! Latest record's log sequence number
339 uint64_t latest_lsn{0U};
340
341 //! Latest record's lsn context
342 std::string latest_lsn_context{};
343
344 //! Server magic number, generally is server started timestamp
345 uint64_t magic_number{0U};
346
347 //! Collection's config max doc number per segment
348 uint32_t max_docs_per_segment{0U};
349
350 //! Collection's forward column names
351 std::vector<std::string> forward_columns{};
352
353 //! Collection's index column params
354 std::vector<IndexColumnParam> index_columns{};
355
356 //! Collection's database repository information
357 DatabaseRepository database_repository{};
358};
359
360
361/**
362 * CollectionStats describes the detailed stastistics of collection
363 */
364struct CollectionStats {
365 /**
366 * Segment state
367 */
368 enum class SegmentState : uint32_t {
369 CREATED = 0,
370 WRITING = 1,
371 DUMPING = 2,
372 COMPACTING = 3,
373 PERSIST = 4
374 };
375
376 /*
377 * SegmentStats describes the detailed stastistics of segment
378 */
379 struct SegmentStats {
380 //! Segment unique id
381 uint64_t segment_id{0U};
382
383 //! Segment state
384 SegmentState segment_state{SegmentState::CREATED};
385
386 //! Document count in this segment
387 uint64_t doc_count{0U};
388
389 //! Index file count of this segment
390 uint64_t index_file_count{0U};
391
392 //! Totaol index file size
393 uint64_t index_file_size{0U};
394
395 //! Min document id
396 uint64_t min_doc_id{0U};
397
398 //! Max document id
399 uint64_t max_doc_id{0U};
400
401 //! Min primary key value of the segment
402 uint64_t min_primary_key{0U};
403
404 //! Min primary key value of the segment
405 uint64_t max_primary_key{0U};
406
407 //! Earliest record timestamp
408 uint64_t min_timestamp{0U};
409
410 //! Last record timestamp
411 uint64_t max_timestamp{0U};
412
413 //! Minimal log sequence number
414 uint64_t min_lsn{0U};
415
416 //! Maximum log sequence number
417 uint64_t max_lsn{0U};
418 };
419
420 //! Collection name
421 std::string collection_name{};
422
423 //! Total document count of this collection
424 uint64_t total_doc_count{0U};
425
426 //! Total segment count of this collectoin
427 uint64_t total_segment_count{0U};
428
429 //! Total index file count
430 uint64_t total_index_file_count{0U};
431
432 //! Total index file size
433 uint64_t total_index_file_size{0U};
434
435 //! Detailed segment stastistics
436 std::vector<SegmentStats> segment_stats{};
437};
438
439using WriteRequestPtr = std::shared_ptr<WriteRequest>;
440/**
441 * WriteRequest shows how to wrapper write request data fields.
442 *
443 * Usage exp:
444 * WriteRequestPtr request = WriteRequest::Create();
445 * request->set_collection_name("test_collection");
446 * request->set_row_meta({"test_column"}, {});
447 * auto row = request->add_row();
448 * row->set_primary_key = 123;
449 * row->set_operation_type(OperationType::OP_INSERT);
450 * row->add_index_value({0.1, 0.2, 0.3});
451 * ...
452 * client->write(*request);
453 */
454class WriteRequest {
455 public:
456 /**
457 * A row describes the format of one record
458 */
459 class Row {
460 public:
461 //! Destructor
462 virtual ~Row() = default;
463
464 //! Set primary key, required
465 virtual void set_primary_key(uint64_t val) = 0;
466
467 //! Set operation type, optional, default DataType::INSERT
468 virtual void set_operation_type(OperationType op_type) = 0;
469
470 //! Set lsn, optional, default 0
471 virtual void set_lsn(uint64_t lsn) = 0;
472
473 //! Set lsn context, optional, default ""
474 virtual void set_lsn_context(const std::string &lsn_context) = 0;
475
476 /// @brief Add forward value with string type
477 ///
478 /// @note Add forward value sort must match configured
479 /// forward columns in CollectionConfig
480 virtual void add_forward_value(const std::string &val) = 0;
481
482 //! Add forward value with bool type
483 virtual void add_forward_value(bool val) = 0;
484
485 //! Add forward value with int32 type
486 virtual void add_forward_value(int32_t val) = 0;
487
488 //! Add forward value with int64 type
489 virtual void add_forward_value(int64_t val) = 0;
490
491 //! Add forward value with uint32 type
492 virtual void add_forward_value(uint32_t val) = 0;
493
494 //! Add forward value with uint64 type
495 virtual void add_forward_value(uint64_t val) = 0;
496
497 //! Add forward value with float type
498 virtual void add_forward_value(float val) = 0;
499
500 //! Add forward value with double type
501 virtual void add_forward_value(double val) = 0;
502
503 /// @brief Add index value, vector bytes type
504 ///
505 /// @note Add index value sort must match configured
506 /// index columns in CollectionConfig
507 virtual void add_index_value(const void *val, size_t val_len) = 0;
508
509 //! Add index value, vector array type
510 virtual void add_index_value(const std::vector<float> &val) = 0;
511
512 /// Add index value by json format
513 /// Two json format:
514 /// "[0.1, 0.2, 0.3, 0.4]"
515 /// "[[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
516 virtual void add_index_value_by_json(const std::string &json_val) = 0;
517 };
518 using RowPtr = std::shared_ptr<Row>;
519
520 public:
521 //! Constructor
522 static WriteRequestPtr Create();
523
524 //! Destructor
525 virtual ~WriteRequest() = default;
526
527 //! Set collection name, required, must be unique
528 virtual void set_collection_name(const std::string &val) = 0;
529
530 /// @brief Add forward column in row meta
531 /// @note Forward column names' sort must match configured
532 /// forward columns in CollectionConfig
533 virtual void add_forward_column(const std::string &column_name) = 0;
534
535 /// @brief Add forward columns in row meta
536 /// @note Forward column names' sort must match configured
537 /// forward columns in CollectionConfig
538 virtual void add_forward_columns(
539 const std::vector<std::string> &column_names) = 0;
540
541 /// @brief Add index column in row meta
542 ///
543 /// @param column_name Column name
544 /// @param data_type Send data type
545 /// @param dimension Send data dimension
546 ///
547 /// @note Index column names' sort must match configured
548 /// index columns in CollectionConfig
549 virtual void add_index_column(const std::string &column_name,
550 DataType data_type, uint32_t dimension) = 0;
551
552 //! Add row data, required, can't send empty request
553 virtual WriteRequest::RowPtr add_row() = 0;
554
555 //! Set request id for tracelog, optional
556 virtual void set_request_id(const std::string &request_id) = 0;
557
558 //! Set magic number for validation, optional
559 virtual void set_magic_number(uint64_t magic_number) = 0;
560};
561
562
563using QueryRequestPtr = std::shared_ptr<QueryRequest>;
564/**
565 * QueryRequest shows how to wrapper query data fields.
566 *
567 * Usage exp:
568 * QueryRequestPtr request = QueryRequest::Create();
569 * request->set_collection_name("test_colletion");
570 * auto knn_param = request->add_knn_query_param();
571 * knn_param->set_column_name("test_column");
572 * knn_param->set_features({0.1, 0.2, 0.3, 0.4});
573 * knn_param->set_batch_count(1);
574 * knn_param->set_dimension(4);
575 * knn_param->set_data_type(DT_VECTOR_FP32);
576 * ...
577 *
578 */
579class QueryRequest {
580 public:
581 /**
582 * KnnQueryParam describes the options of knn query
583 */
584 class KnnQueryParam {
585 public:
586 // Destructor
587 virtual ~KnnQueryParam() = default;
588
589 //! Set column name, required
590 virtual void set_column_name(const std::string &val) = 0;
591
592 //! Set topk, required
593 virtual void set_topk(uint32_t val) = 0;
594
595 /// Set query vector with bytes format by single
596 /// Required set
597 virtual void set_features(const void *val, size_t val_len) = 0;
598
599 //! Set features with vector array format by single
600 virtual void set_features(const std::vector<float> &val) = 0;
601
602 //! Set query vector with bytes format by batch
603 virtual void set_features(const void *val, size_t val_len,
604 uint32_t batch) = 0;
605
606 /// Set features by json format
607 /// Two json format:
608 /// "[0.1, 0.2, 0.3, 0.4]"
609 /// "[[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
610 virtual void set_features_by_json(const std::string &json_val) = 0;
611
612 //! Set features by json format and by batch
613 virtual void set_features_by_json(const std::string &json_val,
614 uint32_t batch) = 0;
615
616 //! Set vector data dimension, required
617 virtual void set_dimension(uint32_t val) = 0;
618
619 //! Set vector data type, required
620 virtual void set_data_type(DataType val) = 0;
621
622 //! Set search radius, optional, default 0.0f not open
623 virtual void set_radius(float val) = 0;
624
625 //! Set if use linear search, optional, default false
626 virtual void set_linear(bool val) = 0;
627
628 //! Add extra params, like ef_search ..etc, optional
629 virtual void add_extra_param(const std::string &key,
630 const std::string &val) = 0;
631 };
632 using KnnQueryParamPtr = std::shared_ptr<KnnQueryParam>;
633
634 public:
635 //! Constructor
636 static QueryRequestPtr Create();
637
638 //! Destructor
639 virtual ~QueryRequest() = default;
640
641 //! Set collection name, required
642 virtual void set_collection_name(const std::string &val) = 0;
643
644 //! Set knn query param, required
645 virtual QueryRequest::KnnQueryParamPtr add_knn_query_param() = 0;
646
647 //! Set debug mode, optional, default false
648 virtual void set_debug_mode(bool val) = 0;
649};
650
651using DocumentPtr = std::shared_ptr<Document>;
652/**
653 * Document shows the format of knn query response
654 */
655class Document {
656 public:
657 //! Destructor
658 virtual ~Document() = default;
659
660 //! Return document primary key
661 virtual uint64_t primary_key() const = 0;
662
663 //! Return calculated knn distance score
664 virtual float score() const = 0;
665
666 //! Return forward values count
667 virtual size_t forward_count() const = 0;
668
669 //! Get forward names
670 virtual void get_forward_names(
671 std::vector<std::string> *forward_names) const = 0;
672
673 //! Get forward value with string type
674 virtual void get_forward_value(const std::string &key,
675 std::string *val) const = 0;
676
677 //! Get forward value with bool type
678 virtual void get_forward_value(const std::string &key, bool *val) const = 0;
679
680 //! Get forward value with int32 type
681 virtual void get_forward_value(const std::string &key,
682 int32_t *val) const = 0;
683
684 //! Get forward value with int64 type
685 virtual void get_forward_value(const std::string &key,
686 int64_t *val) const = 0;
687
688 //! Get forward value with uint32 type
689 virtual void get_forward_value(const std::string &key,
690 uint32_t *val) const = 0;
691
692 //! Get forward value with uint64 type
693 virtual void get_forward_value(const std::string &key,
694 uint64_t *val) const = 0;
695
696 //! Get forward value with float type
697 virtual void get_forward_value(const std::string &key, float *val) const = 0;
698
699 //! Get forward value with double type
700 virtual void get_forward_value(const std::string &key, double *val) const = 0;
701};
702
703
704using QueryResponsePtr = std::shared_ptr<QueryResponse>;
705/**
706 * QueryResponse shows the format of query response.
707 */
708class QueryResponse {
709 public:
710 /**
711 * Result represents a knn query's result
712 */
713 class Result {
714 public:
715 //! Destructor
716 virtual ~Result() = default;
717
718 //! Return document count
719 virtual size_t document_count() const = 0;
720
721 //! Return document pointer of specific pos
722 virtual DocumentPtr document(int index) const = 0;
723 };
724 using ResultPtr = std::shared_ptr<Result>;
725
726 public:
727 //! Constructor
728 static QueryResponsePtr Create();
729
730 //! Destructor
731 virtual ~QueryResponse() = default;
732
733 //! Return debug info
734 virtual const std::string &debug_info() const = 0;
735
736 //! Return query latency, microseconds
737 virtual uint64_t latency_us() const = 0;
738
739 //! Return batch result count
740 virtual size_t result_count() const = 0;
741
742 //! Return result pointer of specific batch pos
743 virtual QueryResponse::ResultPtr result(int index) const = 0;
744};
745
746
747using GetDocumentRequestPtr = std::shared_ptr<GetDocumentRequest>;
748/*
749 * GetDocumentRequest shows the format of get document request.
750 *
751 * Usage exp:
752 * GetDocumentRequestPtr request = GetDocumentRequest::Create();
753 * request->set_collection_name("test_collection");
754 * request->set_primary_key(123);
755 * ...
756 */
757class GetDocumentRequest {
758 public:
759 //! Constructor
760 static GetDocumentRequestPtr Create();
761
762 //! Destructor
763 virtual ~GetDocumentRequest() = default;
764
765 //! Set collection name, required
766 virtual void set_collection_name(const std::string &val) = 0;
767
768 //! Set primary key, required
769 virtual void set_primary_key(uint64_t val) = 0;
770
771 //! Set debug mode, optional, default false
772 virtual void set_debug_mode(bool val) = 0;
773};
774
775
776using GetDocumentResponsePtr = std::shared_ptr<GetDocumentResponse>;
777/*
778 * GetDocumentResponse shows the format of get document response
779 */
780class GetDocumentResponse {
781 public:
782 //! Constructor
783 static GetDocumentResponsePtr Create();
784
785 //! Destructor
786 virtual ~GetDocumentResponse() = default;
787
788 //! Return debug info
789 virtual const std::string &debug_info() const = 0;
790
791 //! Return document that found
792 virtual DocumentPtr document() const = 0;
793};
794
795
796} // end namespace be
797} // end namespace proxima
798