1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | /// \brief SQLite extension for Snappy compression |
17 | /// |
18 | /// Snappy a compression library that trades ratio for speed, almost going a |
19 | /// tenth as fast as memcpy(). |
20 | /// |
21 | /// FUNCTIONS |
22 | /// |
23 | /// - snap(value: BLOB|TEXT) -> BLOB |
24 | /// - snap(value: NULL|INT|REAL) -> value |
25 | /// |
26 | /// Applies Snappy compression. If value is TEXT or BLOB, then it is |
27 | /// compressed and a BLOB is returned with a byte prepended to indicate the |
28 | /// original type. Other types are returned as-is. |
29 | /// |
30 | /// - unsnap(value: BLOB) -> TEXT|BLOB |
31 | /// - unsnap(value: TEXT) -> SQLITE_MISMATCH |
32 | /// - unsnap(value: NULL|INT|REAL) -> value |
33 | /// |
34 | /// Decompresses value created by snap(). If value is empty, then an empty |
35 | /// blob is returned. Otherwise the original type is restored from the first |
36 | /// byte and the remaining ones are decompressed. TEXT is not allowed as an |
37 | /// input type. Remaining types are returned as-is. |
38 | /// |
39 | /// PERFORMANCE CONSIDERATIONS |
40 | /// |
41 | /// These functions are deterministic. This means SQLite ≥3.8.3 will factor |
42 | /// them out of inner loops when constant arguments are provided. In SQLite |
43 | /// ≥3.15.0 they can be used in the WHERE clause of partial indexes. Currently |
44 | /// there is no support for common sub-expression elimination. |
45 | /// |
46 | /// SQLite environments that aren't universally UTF8 will work, but should |
47 | /// encounter superfluous charset transcodings; as this implementation encodes |
48 | /// only UTF8 TEXT for the sake of simplicity. Contributions are welcome that |
49 | /// register multiple sister functions for the various charsets, which use the |
50 | /// higher order bits of the type byte to indicate encoding. |
51 | /// |
52 | /// SUPPORT MATRIX |
53 | /// |
54 | /// - 3.20.0 (2016-05-18) What FOSS TensorFlow uses |
55 | /// - 3.13.0 (2016-05-18) What Google uses c. 2017-12 |
56 | /// - 3.8.2 (2013-12-06) Used by Ubuntu 14.04 |
57 | /// |
58 | /// MANUAL COMPILATION |
59 | /// |
60 | /// $ sudo apt-get install libsqlite3-dev libsnappy-dev |
61 | /// $ c++ -shared --std=c++11 -o libsnapfn.so -fPIC snapfn.cc -lsnappy |
62 | /// |
63 | /// $ sqlite3 |
64 | /// sqlite> .load libsnapfn.so |
65 | /// sqlite> select hex(snap('aaaaaaaaaaaaaaaaa')); |
66 | /// 031100613E0100 |
67 | /// sqlite> select unsnap(x'031100613E0100'); |
68 | /// aaaaaaaaaaaaaaaaa |
69 | /// |
70 | /// $ python |
71 | /// >>> import sqlite3 |
72 | /// >>> db = sqlite3.connect(':memory:') |
73 | /// >>> db.enable_load_extension(True) |
74 | /// >>> db.execute('select load_extension("libsnapfn.so")') |
75 | /// >>> db.enable_load_extension(False) |
76 | /// >>> db.execute('select hex(snap("aaaaaaaaaaaaaaaaa"))').fetchone()[0] |
77 | /// u'031100613E0100' |
78 | |
79 | #include "sqlite3ext.h" |
80 | #include "snappy.h" |
81 | |
82 | SQLITE_EXTENSION_INIT1 |
83 | |
84 | static void snap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) { |
85 | const char* data; |
86 | int type = sqlite3_value_type(argv[0]); |
87 | switch (type) { |
88 | case SQLITE_NULL: |
89 | return; |
90 | case SQLITE_INTEGER: |
91 | sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0])); |
92 | return; |
93 | case SQLITE_FLOAT: |
94 | sqlite3_result_double(ctx, sqlite3_value_double(argv[0])); |
95 | return; |
96 | case SQLITE_BLOB: |
97 | data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0])); |
98 | break; |
99 | case SQLITE_TEXT: |
100 | data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0])); |
101 | break; |
102 | default: |
103 | sqlite3_result_error(ctx, "snap() invalid type" , -1); |
104 | sqlite3_result_error_code(ctx, SQLITE_MISMATCH); |
105 | return; |
106 | } |
107 | int size = sqlite3_value_bytes(argv[0]); |
108 | if (size <= 0) { |
109 | char result[] = {static_cast<char>(type)}; |
110 | sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT); |
111 | return; |
112 | } |
113 | size_t output_size = |
114 | snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1; |
115 | if (output_size > |
116 | static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx), |
117 | SQLITE_LIMIT_LENGTH, -1))) { |
118 | sqlite3_result_error_toobig(ctx); |
119 | return; |
120 | } |
121 | auto output = |
122 | static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size))); |
123 | if (output == nullptr) { |
124 | sqlite3_result_error_nomem(ctx); |
125 | return; |
126 | } |
127 | *output++ = static_cast<char>(type), --output_size; |
128 | snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size); |
129 | sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1), |
130 | sqlite3_free); |
131 | } |
132 | |
133 | static void unsnap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) { |
134 | int type = sqlite3_value_type(argv[0]); |
135 | switch (type) { |
136 | case SQLITE_NULL: |
137 | return; |
138 | case SQLITE_INTEGER: |
139 | sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0])); |
140 | return; |
141 | case SQLITE_FLOAT: |
142 | sqlite3_result_double(ctx, sqlite3_value_double(argv[0])); |
143 | return; |
144 | case SQLITE_BLOB: |
145 | break; |
146 | default: |
147 | sqlite3_result_error(ctx, "unsnap() invalid type" , -1); |
148 | sqlite3_result_error_code(ctx, SQLITE_MISMATCH); |
149 | return; |
150 | } |
151 | int size = sqlite3_value_bytes(argv[0]); |
152 | auto blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0])); |
153 | if (size <= 0) { |
154 | sqlite3_result_zeroblob(ctx, 0); |
155 | return; |
156 | } |
157 | type = static_cast<int>(*blob++), --size; |
158 | if (type != SQLITE_BLOB && type != SQLITE_TEXT) { |
159 | sqlite3_result_error(ctx, "unsnap() first byte is invalid type" , -1); |
160 | sqlite3_result_error_code(ctx, SQLITE_CORRUPT); |
161 | return; |
162 | } |
163 | if (size == 0) { |
164 | if (type == SQLITE_TEXT) { |
165 | sqlite3_result_text(ctx, "" , 0, SQLITE_STATIC); |
166 | } else { |
167 | sqlite3_result_zeroblob(ctx, 0); |
168 | } |
169 | return; |
170 | } |
171 | size_t output_size; |
172 | if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size), |
173 | &output_size)) { |
174 | sqlite3_result_error(ctx, "snappy parse error" , -1); |
175 | sqlite3_result_error_code(ctx, SQLITE_CORRUPT); |
176 | return; |
177 | } |
178 | if (output_size > |
179 | static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx), |
180 | SQLITE_LIMIT_LENGTH, -1))) { |
181 | sqlite3_result_error_toobig(ctx); |
182 | return; |
183 | } |
184 | auto output = |
185 | static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size))); |
186 | if (output == nullptr) { |
187 | sqlite3_result_error_nomem(ctx); |
188 | return; |
189 | } |
190 | if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) { |
191 | sqlite3_result_error(ctx, "snappy message corruption" , -1); |
192 | sqlite3_result_error_code(ctx, SQLITE_CORRUPT); |
193 | sqlite3_free(output); |
194 | return; |
195 | } |
196 | if (type == SQLITE_TEXT) { |
197 | sqlite3_result_text(ctx, output, static_cast<int>(output_size), |
198 | sqlite3_free); |
199 | } else { |
200 | sqlite3_result_blob(ctx, output, static_cast<int>(output_size), |
201 | sqlite3_free); |
202 | } |
203 | } |
204 | |
205 | extern "C" { |
206 | |
207 | #ifndef SQLITE_DETERMINISTIC |
208 | #define SQLITE_DETERMINISTIC 0 |
209 | #endif |
210 | |
211 | #ifndef SQLITE_CALLBACK |
212 | #define SQLITE_CALLBACK |
213 | #endif |
214 | |
215 | SQLITE_CALLBACK int sqlite3_snapfn_init(sqlite3* db, const char** /*pzErrMsg*/, |
216 | const sqlite3_api_routines* pApi) { |
217 | SQLITE_EXTENSION_INIT2(pApi); |
218 | int rc; |
219 | |
220 | rc = sqlite3_create_function_v2( |
221 | db, |
222 | "snap" , // zFunctionName |
223 | 1, // nArg |
224 | SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep |
225 | nullptr, // pApp |
226 | snap, // xFunc |
227 | nullptr, // xStep |
228 | nullptr, // xFinal |
229 | nullptr // xDestroy |
230 | ); |
231 | if (rc != SQLITE_OK) { |
232 | return rc; |
233 | } |
234 | |
235 | rc = sqlite3_create_function_v2( |
236 | db, |
237 | "unsnap" , // zFunctionName |
238 | 1, // nArg |
239 | SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep |
240 | nullptr, // pApp |
241 | unsnap, // xFunc |
242 | nullptr, // xStep |
243 | nullptr, // xFinal |
244 | nullptr // xDestroy |
245 | ); |
246 | if (rc != SQLITE_OK) { |
247 | return rc; |
248 | } |
249 | |
250 | return SQLITE_OK; |
251 | } |
252 | |
253 | } // extern "C" |
254 | |