1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16/// \brief SQLite extension for Snappy compression
17///
18/// Snappy a compression library that trades ratio for speed, almost going a
19/// tenth as fast as memcpy().
20///
21/// FUNCTIONS
22///
23/// - snap(value: BLOB|TEXT) -> BLOB
24/// - snap(value: NULL|INT|REAL) -> value
25///
26/// Applies Snappy compression. If value is TEXT or BLOB, then it is
27/// compressed and a BLOB is returned with a byte prepended to indicate the
28/// original type. Other types are returned as-is.
29///
30/// - unsnap(value: BLOB) -> TEXT|BLOB
31/// - unsnap(value: TEXT) -> SQLITE_MISMATCH
32/// - unsnap(value: NULL|INT|REAL) -> value
33///
34/// Decompresses value created by snap(). If value is empty, then an empty
35/// blob is returned. Otherwise the original type is restored from the first
36/// byte and the remaining ones are decompressed. TEXT is not allowed as an
37/// input type. Remaining types are returned as-is.
38///
39/// PERFORMANCE CONSIDERATIONS
40///
41/// These functions are deterministic. This means SQLite ≥3.8.3 will factor
42/// them out of inner loops when constant arguments are provided. In SQLite
43/// ≥3.15.0 they can be used in the WHERE clause of partial indexes. Currently
44/// there is no support for common sub-expression elimination.
45///
46/// SQLite environments that aren't universally UTF8 will work, but should
47/// encounter superfluous charset transcodings; as this implementation encodes
48/// only UTF8 TEXT for the sake of simplicity. Contributions are welcome that
49/// register multiple sister functions for the various charsets, which use the
50/// higher order bits of the type byte to indicate encoding.
51///
52/// SUPPORT MATRIX
53///
54/// - 3.20.0 (2016-05-18) What FOSS TensorFlow uses
55/// - 3.13.0 (2016-05-18) What Google uses c. 2017-12
56/// - 3.8.2 (2013-12-06) Used by Ubuntu 14.04
57///
58/// MANUAL COMPILATION
59///
60/// $ sudo apt-get install libsqlite3-dev libsnappy-dev
61/// $ c++ -shared --std=c++11 -o libsnapfn.so -fPIC snapfn.cc -lsnappy
62///
63/// $ sqlite3
64/// sqlite> .load libsnapfn.so
65/// sqlite> select hex(snap('aaaaaaaaaaaaaaaaa'));
66/// 031100613E0100
67/// sqlite> select unsnap(x'031100613E0100');
68/// aaaaaaaaaaaaaaaaa
69///
70/// $ python
71/// >>> import sqlite3
72/// >>> db = sqlite3.connect(':memory:')
73/// >>> db.enable_load_extension(True)
74/// >>> db.execute('select load_extension("libsnapfn.so")')
75/// >>> db.enable_load_extension(False)
76/// >>> db.execute('select hex(snap("aaaaaaaaaaaaaaaaa"))').fetchone()[0]
77/// u'031100613E0100'
78
79#include "sqlite3ext.h"
80#include "snappy.h"
81
82SQLITE_EXTENSION_INIT1
83
84static void snap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
85 const char* data;
86 int type = sqlite3_value_type(argv[0]);
87 switch (type) {
88 case SQLITE_NULL:
89 return;
90 case SQLITE_INTEGER:
91 sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
92 return;
93 case SQLITE_FLOAT:
94 sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
95 return;
96 case SQLITE_BLOB:
97 data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
98 break;
99 case SQLITE_TEXT:
100 data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
101 break;
102 default:
103 sqlite3_result_error(ctx, "snap() invalid type", -1);
104 sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
105 return;
106 }
107 int size = sqlite3_value_bytes(argv[0]);
108 if (size <= 0) {
109 char result[] = {static_cast<char>(type)};
110 sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT);
111 return;
112 }
113 size_t output_size =
114 snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1;
115 if (output_size >
116 static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
117 SQLITE_LIMIT_LENGTH, -1))) {
118 sqlite3_result_error_toobig(ctx);
119 return;
120 }
121 auto output =
122 static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
123 if (output == nullptr) {
124 sqlite3_result_error_nomem(ctx);
125 return;
126 }
127 *output++ = static_cast<char>(type), --output_size;
128 snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size);
129 sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1),
130 sqlite3_free);
131}
132
133static void unsnap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
134 int type = sqlite3_value_type(argv[0]);
135 switch (type) {
136 case SQLITE_NULL:
137 return;
138 case SQLITE_INTEGER:
139 sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
140 return;
141 case SQLITE_FLOAT:
142 sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
143 return;
144 case SQLITE_BLOB:
145 break;
146 default:
147 sqlite3_result_error(ctx, "unsnap() invalid type", -1);
148 sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
149 return;
150 }
151 int size = sqlite3_value_bytes(argv[0]);
152 auto blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
153 if (size <= 0) {
154 sqlite3_result_zeroblob(ctx, 0);
155 return;
156 }
157 type = static_cast<int>(*blob++), --size;
158 if (type != SQLITE_BLOB && type != SQLITE_TEXT) {
159 sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1);
160 sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
161 return;
162 }
163 if (size == 0) {
164 if (type == SQLITE_TEXT) {
165 sqlite3_result_text(ctx, "", 0, SQLITE_STATIC);
166 } else {
167 sqlite3_result_zeroblob(ctx, 0);
168 }
169 return;
170 }
171 size_t output_size;
172 if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size),
173 &output_size)) {
174 sqlite3_result_error(ctx, "snappy parse error", -1);
175 sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
176 return;
177 }
178 if (output_size >
179 static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
180 SQLITE_LIMIT_LENGTH, -1))) {
181 sqlite3_result_error_toobig(ctx);
182 return;
183 }
184 auto output =
185 static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
186 if (output == nullptr) {
187 sqlite3_result_error_nomem(ctx);
188 return;
189 }
190 if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) {
191 sqlite3_result_error(ctx, "snappy message corruption", -1);
192 sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
193 sqlite3_free(output);
194 return;
195 }
196 if (type == SQLITE_TEXT) {
197 sqlite3_result_text(ctx, output, static_cast<int>(output_size),
198 sqlite3_free);
199 } else {
200 sqlite3_result_blob(ctx, output, static_cast<int>(output_size),
201 sqlite3_free);
202 }
203}
204
205extern "C" {
206
207#ifndef SQLITE_DETERMINISTIC
208#define SQLITE_DETERMINISTIC 0
209#endif
210
211#ifndef SQLITE_CALLBACK
212#define SQLITE_CALLBACK
213#endif
214
215SQLITE_CALLBACK int sqlite3_snapfn_init(sqlite3* db, const char** /*pzErrMsg*/,
216 const sqlite3_api_routines* pApi) {
217 SQLITE_EXTENSION_INIT2(pApi);
218 int rc;
219
220 rc = sqlite3_create_function_v2(
221 db,
222 "snap", // zFunctionName
223 1, // nArg
224 SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep
225 nullptr, // pApp
226 snap, // xFunc
227 nullptr, // xStep
228 nullptr, // xFinal
229 nullptr // xDestroy
230 );
231 if (rc != SQLITE_OK) {
232 return rc;
233 }
234
235 rc = sqlite3_create_function_v2(
236 db,
237 "unsnap", // zFunctionName
238 1, // nArg
239 SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep
240 nullptr, // pApp
241 unsnap, // xFunc
242 nullptr, // xStep
243 nullptr, // xFinal
244 nullptr // xDestroy
245 );
246 if (rc != SQLITE_OK) {
247 return rc;
248 }
249
250 return SQLITE_OK;
251}
252
253} // extern "C"
254