1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | |
16 | * \author Hongqing.hu |
17 | * \date Oct 2020 |
18 | * \brief Proto converter interface definition for bilin engine |
19 | */ |
20 | |
21 | #pragma once |
22 | |
23 | #include <ailego/encoding/json.h> |
24 | #include <ailego/utility/float_helper.h> |
25 | #include "common/error_code.h" |
26 | #include "common/logger.h" |
27 | #include "common/transformer.h" |
28 | #include "index/collection_dataset.h" |
29 | #include "meta/meta.h" |
30 | #include "proto/proxima_be.pb.h" |
31 | |
32 | namespace proxima { |
33 | namespace be { |
34 | namespace server { |
35 | |
36 | /*! ProtoConverter |
37 | */ |
38 | class ProtoConverter { |
39 | public: |
40 | //! Convert Index Data |
41 | static int ConvertIndexData( |
42 | const std::string &index_value, const meta::ColumnMeta &column_meta, |
43 | const proto::WriteRequest::IndexColumnMeta &proto_meta, bool is_bytes, |
44 | index::ColumnData *column_value); |
45 | |
46 | private: |
47 | //! Parse single index column value |
48 | static int ParseJsonIndexColumnValue( |
49 | const std::string &column_value, const meta::ColumnMeta &meta, |
50 | const proto::WriteRequest::IndexColumnMeta &proto_meta, |
51 | std::string *serialized_value); |
52 | |
53 | //! Parse single index column value |
54 | static int ParseBytesIndexColumnValue( |
55 | const std::string &column_value, const meta::ColumnMeta &meta, |
56 | const proto::WriteRequest::IndexColumnMeta &proto_meta, |
57 | std::string *serialized_value); |
58 | |
59 | //! Copy bytes index column value |
60 | static int CopyBytesIndexColumnValue(const std::string &column_value, |
61 | const meta::ColumnMeta &meta, |
62 | std::string *serialized_value); |
63 | |
64 | //! Parse single typed index column value |
65 | template <typename T> |
66 | static int ParseTypedIndexColumnValue(const std::string &column_value, |
67 | const meta::ColumnMeta &meta, |
68 | std::string *serialized_value); |
69 | |
70 | //! Parse single typed index column value |
71 | static int SetTypedIndexColumnValue(const std::string &column_value, |
72 | std::string *serialized_value); |
73 | |
74 | //! Validate single typed index column value |
75 | template <typename T> |
76 | static int ValidateTypedIndexColumnValue(const std::string &column_value, |
77 | const meta::ColumnMeta &meta); |
78 | }; |
79 | |
80 | template <typename T> |
81 | int ProtoConverter::ParseTypedIndexColumnValue(const std::string &column_value, |
82 | const meta::ColumnMeta &meta, |
83 | std::string *serialized_value) { |
84 | std::vector<T> values; |
85 | DataTypes data_type = meta.data_type(); |
86 | uint32_t dimension = meta.dimension(); |
87 | Transformer::Transform(column_value, nullptr, &values); |
88 | if (data_type == DataTypes::VECTOR_BINARY32) { |
89 | dimension /= 32; |
90 | } else if (data_type == DataTypes::VECTOR_BINARY64) { |
91 | dimension /= 64; |
92 | } |
93 | if (values.size() != dimension) { |
94 | LOG_ERROR("Vector dimension mismatched. expected[%u], actual[%zu]" , |
95 | dimension, values.size()); |
96 | return ErrorCode_MismatchedDimension; |
97 | } |
98 | |
99 | |
100 | if (data_type == DataTypes::VECTOR_INT4) { |
101 | Primary2Bytes::Bytes<T, DataTypes::VECTOR_INT4>(values, serialized_value); |
102 | } else if (data_type == DataTypes::VECTOR_FP16) { |
103 | Primary2Bytes::Bytes<T, DataTypes::VECTOR_FP16>(values, serialized_value); |
104 | } else { |
105 | size_t vector_size = dimension * sizeof(T); |
106 | serialized_value->resize(vector_size); |
107 | memcpy(&((*serialized_value)[0]), values.data(), vector_size); |
108 | } |
109 | |
110 | return 0; |
111 | } |
112 | |
113 | template <typename T> |
114 | int ProtoConverter::ValidateTypedIndexColumnValue( |
115 | const std::string &column_value, const meta::ColumnMeta &meta) { |
116 | DataTypes data_type = meta.data_type(); |
117 | uint32_t dimension = meta.dimension(); |
118 | |
119 | if (data_type == DataTypes::VECTOR_BINARY32) { |
120 | dimension /= 32; |
121 | } else if (data_type == DataTypes::VECTOR_BINARY64) { |
122 | dimension /= 64; |
123 | } |
124 | |
125 | size_t except_size = 0; |
126 | if (data_type == DataTypes::VECTOR_INT4 || |
127 | data_type == DataTypes::VECTOR_FP16) { |
128 | except_size = dimension * sizeof(T) / 2; |
129 | } else { |
130 | except_size = dimension * sizeof(T); |
131 | } |
132 | |
133 | if (except_size != column_value.size()) { |
134 | LOG_ERROR("Vector size mismatched. expected[%zu], actual[%zu]" , except_size, |
135 | column_value.size()); |
136 | return ErrorCode_MismatchedDimension; |
137 | } |
138 | |
139 | return 0; |
140 | } |
141 | |
142 | } // end namespace server |
143 | } // namespace be |
144 | } // end namespace proxima |
145 | |