1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/core/summary/schema.h" |
16 | |
17 | #include "tensorflow/core/lib/core/errors.h" |
18 | |
19 | namespace tensorflow { |
20 | namespace { |
21 | |
22 | Status Run(Sqlite* db, const char* sql) { |
23 | SqliteStatement stmt; |
24 | TF_RETURN_IF_ERROR(db->Prepare(sql, &stmt)); |
25 | return stmt.StepAndReset(); |
26 | } |
27 | |
28 | } // namespace |
29 | |
30 | Status SetupTensorboardSqliteDb(Sqlite* db) { |
31 | // Note: GCC raw strings macros are broken. |
32 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55971 |
33 | TF_RETURN_IF_ERROR( |
34 | db->PrepareOrDie(strings::StrCat("PRAGMA application_id=" , |
35 | kTensorboardSqliteApplicationId)) |
36 | .StepAndReset()); |
37 | db->PrepareOrDie("PRAGMA user_version=0" ).StepAndResetOrDie(); |
38 | Status s; |
39 | |
40 | // Ids identify resources. |
41 | // |
42 | // This table can be used to efficiently generate Permanent IDs in |
43 | // conjunction with a random number generator. Unlike rowids these |
44 | // IDs safe to use in URLs and unique across tables. |
45 | // |
46 | // Within any given system, there can't be any foo_id == bar_id for |
47 | // all rows of any two (Foos, Bars) tables. A row should only be |
48 | // deleted from this table if there's a very high level of confidence |
49 | // it exists nowhere else in the system. |
50 | // |
51 | // Fields: |
52 | // id: The system-wide ID. This must be in the range [1,2**47). 0 |
53 | // is assigned the same meaning as NULL and shouldn't be stored |
54 | // and all other int64 values are reserved for future use. Please |
55 | // note that id is also the rowid. |
56 | s.Update(Run(db, R"sql( |
57 | CREATE TABLE IF NOT EXISTS Ids ( |
58 | id INTEGER PRIMARY KEY |
59 | ) |
60 | )sql" )); |
61 | |
62 | // Descriptions are Markdown text that can be associated with any |
63 | // resource that has a Permanent ID. |
64 | // |
65 | // Fields: |
66 | // id: The foo_id of the associated row in Foos. |
67 | // description: Arbitrary NUL-terminated Markdown text. |
68 | s.Update(Run(db, R"sql( |
69 | CREATE TABLE IF NOT EXISTS Descriptions ( |
70 | id INTEGER PRIMARY KEY, |
71 | description TEXT |
72 | ) |
73 | )sql" )); |
74 | |
75 | // Tensors are 0..n-dimensional numbers or strings. |
76 | // |
77 | // Fields: |
78 | // rowid: Ephemeral b-tree ID. |
79 | // series: The Permanent ID of a different resource, e.g. tag_id. A |
80 | // tensor will be vacuumed if no series == foo_id exists for all |
81 | // rows of all Foos. When series is NULL this tensor may serve |
82 | // undefined purposes. This field should be set on placeholders. |
83 | // step: Arbitrary number to uniquely order tensors within series. |
84 | // The meaning of step is undefined when series is NULL. This may |
85 | // be set on placeholders to prepopulate index pages. |
86 | // computed_time: Float UNIX timestamp with microsecond precision. |
87 | // In the old summaries system that uses FileWriter, this is the |
88 | // wall time around when tf.Session.run finished. In the new |
89 | // summaries system, it is the wall time of when the tensor was |
90 | // computed. On systems with monotonic clocks, it is calculated |
91 | // by adding the monotonic run duration to Run.started_time. |
92 | // dtype: The tensorflow::DataType ID. For example, DT_INT64 is 9. |
93 | // When NULL or 0 this must be treated as a placeholder row that |
94 | // does not officially exist. |
95 | // shape: A comma-delimited list of int64 >=0 values representing |
96 | // length of each dimension in the tensor. This must be a valid |
97 | // shape. That means no -1 values and, in the case of numeric |
98 | // tensors, length(data) == product(shape) * sizeof(dtype). Empty |
99 | // means this is a scalar a.k.a. 0-dimensional tensor. |
100 | // data: Little-endian raw tensor memory. If dtype is DT_STRING and |
101 | // shape is empty, the nullness of this field indicates whether or |
102 | // not it contains the tensor contents; otherwise TensorStrings |
103 | // must be queried. If dtype is NULL then ZEROBLOB can be used on |
104 | // this field to reserve row space to be updated later. |
105 | s.Update(Run(db, R"sql( |
106 | CREATE TABLE IF NOT EXISTS Tensors ( |
107 | rowid INTEGER PRIMARY KEY, |
108 | series INTEGER, |
109 | step INTEGER, |
110 | dtype INTEGER, |
111 | computed_time REAL, |
112 | shape TEXT, |
113 | data BLOB |
114 | ) |
115 | )sql" )); |
116 | |
117 | s.Update(Run(db, R"sql( |
118 | CREATE UNIQUE INDEX IF NOT EXISTS |
119 | TensorSeriesStepIndex |
120 | ON |
121 | Tensors (series, step) |
122 | WHERE |
123 | series IS NOT NULL |
124 | AND step IS NOT NULL |
125 | )sql" )); |
126 | |
127 | // TensorStrings are the flat contents of 1..n dimensional DT_STRING |
128 | // Tensors. |
129 | // |
130 | // The number of rows associated with a Tensor must be equal to the |
131 | // product of its Tensors.shape. |
132 | // |
133 | // Fields: |
134 | // rowid: Ephemeral b-tree ID. |
135 | // tensor_rowid: References Tensors.rowid. |
136 | // idx: Index in flattened tensor, starting at 0. |
137 | // data: The string value at a particular index. NUL characters are |
138 | // permitted. |
139 | s.Update(Run(db, R"sql( |
140 | CREATE TABLE IF NOT EXISTS TensorStrings ( |
141 | rowid INTEGER PRIMARY KEY, |
142 | tensor_rowid INTEGER NOT NULL, |
143 | idx INTEGER NOT NULL, |
144 | data BLOB |
145 | ) |
146 | )sql" )); |
147 | |
148 | s.Update(Run(db, R"sql( |
149 | CREATE UNIQUE INDEX IF NOT EXISTS TensorStringIndex |
150 | ON TensorStrings (tensor_rowid, idx) |
151 | )sql" )); |
152 | |
153 | // Tags are series of Tensors. |
154 | // |
155 | // Fields: |
156 | // rowid: Ephemeral b-tree ID. |
157 | // tag_id: The Permanent ID of the Tag. |
158 | // run_id: Optional ID of associated Run. |
159 | // inserted_time: Float UNIX timestamp with µs precision. This is |
160 | // always the wall time of when the row was inserted into the |
161 | // DB. It may be used as a hint for an archival job. |
162 | // tag_name: The tag field in summary.proto, unique across Run. |
163 | // display_name: Optional for GUI and defaults to tag_name. |
164 | // plugin_name: Arbitrary TensorBoard plugin name for dispatch. |
165 | // plugin_data: Arbitrary data that plugin wants. |
166 | // |
167 | // TODO(jart): Maybe there should be a Plugins table? |
168 | s.Update(Run(db, R"sql( |
169 | CREATE TABLE IF NOT EXISTS Tags ( |
170 | rowid INTEGER PRIMARY KEY, |
171 | run_id INTEGER, |
172 | tag_id INTEGER NOT NULL, |
173 | inserted_time DOUBLE, |
174 | tag_name TEXT, |
175 | display_name TEXT, |
176 | plugin_name TEXT, |
177 | plugin_data BLOB |
178 | ) |
179 | )sql" )); |
180 | |
181 | s.Update(Run(db, R"sql( |
182 | CREATE UNIQUE INDEX IF NOT EXISTS TagIdIndex |
183 | ON Tags (tag_id) |
184 | )sql" )); |
185 | |
186 | s.Update(Run(db, R"sql( |
187 | CREATE UNIQUE INDEX IF NOT EXISTS |
188 | TagRunNameIndex |
189 | ON |
190 | Tags (run_id, tag_name) |
191 | WHERE |
192 | run_id IS NOT NULL |
193 | AND tag_name IS NOT NULL |
194 | )sql" )); |
195 | |
196 | // Runs are groups of Tags. |
197 | // |
198 | // Each Run usually represents a single attempt at training or testing |
199 | // a TensorFlow model, with a given set of hyper-parameters, whose |
200 | // summaries are written out to a single event logs directory with a |
201 | // monotonic step counter. |
202 | // |
203 | // Fields: |
204 | // rowid: Ephemeral b-tree ID. |
205 | // run_id: The Permanent ID of the Run. This has a 1:1 mapping |
206 | // with a SummaryWriter instance. If two writers spawn for a |
207 | // given (user_name, run_name, run_name) then each should |
208 | // allocate its own run_id and whichever writer puts it in the |
209 | // database last wins. The Tags / Tensors associated with the |
210 | // previous invocations will then enter limbo, where they may be |
211 | // accessible for certain operations, but should be garbage |
212 | // collected eventually. |
213 | // run_name: User-supplied string, unique across Experiment. |
214 | // experiment_id: Optional ID of associated Experiment. |
215 | // inserted_time: Float UNIX timestamp with µs precision. This is |
216 | // always the time the row was inserted into the database. It |
217 | // does not change. |
218 | // started_time: Float UNIX timestamp with µs precision. In the |
219 | // old summaries system that uses FileWriter, this is |
220 | // approximated as the first tf.Event.wall_time. In the new |
221 | // summaries system, it is the wall time of when summary writing |
222 | // started, from the perspective of whichever machine talks to |
223 | // the database. This field will be mutated if the run is |
224 | // restarted. |
225 | // finished_time: Float UNIX timestamp with µs precision of when |
226 | // SummaryWriter resource that created this run was destroyed. |
227 | // Once this value becomes non-NULL a Run and its Tags and |
228 | // Tensors should be regarded as immutable. |
229 | s.Update(Run(db, R"sql( |
230 | CREATE TABLE IF NOT EXISTS Runs ( |
231 | rowid INTEGER PRIMARY KEY, |
232 | experiment_id INTEGER, |
233 | run_id INTEGER NOT NULL, |
234 | inserted_time REAL, |
235 | started_time REAL, |
236 | finished_time REAL, |
237 | run_name TEXT |
238 | ) |
239 | )sql" )); |
240 | |
241 | s.Update(Run(db, R"sql( |
242 | CREATE UNIQUE INDEX IF NOT EXISTS RunIdIndex |
243 | ON Runs (run_id) |
244 | )sql" )); |
245 | |
246 | s.Update(Run(db, R"sql( |
247 | CREATE UNIQUE INDEX IF NOT EXISTS RunNameIndex |
248 | ON Runs (experiment_id, run_name) |
249 | WHERE run_name IS NOT NULL |
250 | )sql" )); |
251 | |
252 | // Experiments are groups of Runs. |
253 | // |
254 | // Fields: |
255 | // rowid: Ephemeral b-tree ID. |
256 | // user_id: Optional ID of associated User. |
257 | // experiment_id: The Permanent ID of the Experiment. |
258 | // experiment_name: User-supplied string, unique across User. |
259 | // inserted_time: Float UNIX timestamp with µs precision. This is |
260 | // always the time the row was inserted into the database. It |
261 | // does not change. |
262 | // started_time: Float UNIX timestamp with µs precision. This is |
263 | // the MIN(experiment.started_time, run.started_time) of each |
264 | // Run added to the database, including Runs which have since |
265 | // been overwritten. |
266 | // is_watching: A boolean indicating if someone is actively |
267 | // looking at this Experiment in the TensorBoard GUI. Tensor |
268 | // writers that do reservoir sampling can query this value to |
269 | // decide if they want the "keep last" behavior. This improves |
270 | // the performance of long running training while allowing low |
271 | // latency feedback in TensorBoard. |
272 | s.Update(Run(db, R"sql( |
273 | CREATE TABLE IF NOT EXISTS Experiments ( |
274 | rowid INTEGER PRIMARY KEY, |
275 | user_id INTEGER, |
276 | experiment_id INTEGER NOT NULL, |
277 | inserted_time REAL, |
278 | started_time REAL, |
279 | is_watching INTEGER, |
280 | experiment_name TEXT |
281 | ) |
282 | )sql" )); |
283 | |
284 | s.Update(Run(db, R"sql( |
285 | CREATE UNIQUE INDEX IF NOT EXISTS ExperimentIdIndex |
286 | ON Experiments (experiment_id) |
287 | )sql" )); |
288 | |
289 | s.Update(Run(db, R"sql( |
290 | CREATE UNIQUE INDEX IF NOT EXISTS ExperimentNameIndex |
291 | ON Experiments (user_id, experiment_name) |
292 | WHERE experiment_name IS NOT NULL |
293 | )sql" )); |
294 | |
295 | // Users are people who love TensorBoard. |
296 | // |
297 | // Fields: |
298 | // rowid: Ephemeral b-tree ID. |
299 | // user_id: The Permanent ID of the User. |
300 | // user_name: Unique user name. |
301 | // email: Optional unique email address. |
302 | // inserted_time: Float UNIX timestamp with µs precision. This is |
303 | // always the time the row was inserted into the database. It |
304 | // does not change. |
305 | s.Update(Run(db, R"sql( |
306 | CREATE TABLE IF NOT EXISTS Users ( |
307 | rowid INTEGER PRIMARY KEY, |
308 | user_id INTEGER NOT NULL, |
309 | inserted_time REAL, |
310 | user_name TEXT, |
311 | email TEXT |
312 | ) |
313 | )sql" )); |
314 | |
315 | s.Update(Run(db, R"sql( |
316 | CREATE UNIQUE INDEX IF NOT EXISTS UserIdIndex |
317 | ON Users (user_id) |
318 | )sql" )); |
319 | |
320 | s.Update(Run(db, R"sql( |
321 | CREATE UNIQUE INDEX IF NOT EXISTS UserNameIndex |
322 | ON Users (user_name) |
323 | WHERE user_name IS NOT NULL |
324 | )sql" )); |
325 | |
326 | s.Update(Run(db, R"sql( |
327 | CREATE UNIQUE INDEX IF NOT EXISTS UserEmailIndex |
328 | ON Users (email) |
329 | WHERE email IS NOT NULL |
330 | )sql" )); |
331 | |
332 | // Graphs define how Tensors flowed in Runs. |
333 | // |
334 | // Fields: |
335 | // rowid: Ephemeral b-tree ID. |
336 | // run_id: The Permanent ID of the associated Run. Only one Graph |
337 | // can be associated with a Run. |
338 | // graph_id: The Permanent ID of the Graph. |
339 | // inserted_time: Float UNIX timestamp with µs precision. This is |
340 | // always the wall time of when the row was inserted into the |
341 | // DB. It may be used as a hint for an archival job. |
342 | // graph_def: Contains the tf.GraphDef proto parts leftover which |
343 | // haven't been defined in SQL yet. |
344 | s.Update(Run(db, R"sql( |
345 | CREATE TABLE IF NOT EXISTS Graphs ( |
346 | rowid INTEGER PRIMARY KEY, |
347 | run_id INTEGER, |
348 | graph_id INTEGER NOT NULL, |
349 | inserted_time REAL, |
350 | graph_def BLOB |
351 | ) |
352 | )sql" )); |
353 | |
354 | s.Update(Run(db, R"sql( |
355 | CREATE UNIQUE INDEX IF NOT EXISTS GraphIdIndex |
356 | ON Graphs (graph_id) |
357 | )sql" )); |
358 | |
359 | s.Update(Run(db, R"sql( |
360 | CREATE UNIQUE INDEX IF NOT EXISTS GraphRunIndex |
361 | ON Graphs (run_id) |
362 | WHERE run_id IS NOT NULL |
363 | )sql" )); |
364 | |
365 | // Nodes are the vertices in Graphs. |
366 | // |
367 | // Fields: |
368 | // rowid: Ephemeral b-tree ID. |
369 | // graph_id: The Permanent ID of the associated Graph. |
370 | // node_id: ID for this node. This is more like a 0-index within |
371 | // the Graph. Please note indexes are allowed to be removed. |
372 | // node_name: Unique name for this Node within Graph. This is |
373 | // copied from the proto so it can be indexed. This is allowed |
374 | // to be NULL to save space on the index, in which case the |
375 | // node_def.name proto field must not be cleared. |
376 | // op: Copied from tf.NodeDef proto. |
377 | // device: Copied from tf.NodeDef proto. |
378 | // node_def: Contains the tf.NodeDef proto parts leftover which |
379 | // haven't been defined in SQL yet. |
380 | // |
381 | // TODO(jart): Make separate tables for op and device strings. |
382 | s.Update(Run(db, R"sql( |
383 | CREATE TABLE IF NOT EXISTS Nodes ( |
384 | rowid INTEGER PRIMARY KEY, |
385 | graph_id INTEGER NOT NULL, |
386 | node_id INTEGER NOT NULL, |
387 | node_name TEXT, |
388 | op TEXT, |
389 | device TEXT, |
390 | node_def BLOB |
391 | ) |
392 | )sql" )); |
393 | |
394 | s.Update(Run(db, R"sql( |
395 | CREATE UNIQUE INDEX IF NOT EXISTS NodeIdIndex |
396 | ON Nodes (graph_id, node_id) |
397 | )sql" )); |
398 | |
399 | s.Update(Run(db, R"sql( |
400 | CREATE UNIQUE INDEX IF NOT EXISTS NodeNameIndex |
401 | ON Nodes (graph_id, node_name) |
402 | WHERE node_name IS NOT NULL |
403 | )sql" )); |
404 | |
405 | // NodeInputs are directed edges between Nodes in Graphs. |
406 | // |
407 | // Fields: |
408 | // rowid: Ephemeral b-tree ID. |
409 | // graph_id: The Permanent ID of the associated Graph. |
410 | // node_id: Index of Node in question. This can be considered the |
411 | // 'to' vertex. |
412 | // idx: Used for ordering inputs on a given Node. |
413 | // input_node_id: Nodes.node_id of the corresponding input node. |
414 | // This can be considered the 'from' vertex. |
415 | // input_node_idx: Since a Node can output multiple Tensors, this |
416 | // is the integer index of which of those outputs is our input. |
417 | // NULL is treated as 0. |
418 | // is_control: If non-zero, indicates this input is a controlled |
419 | // dependency, which means this isn't an edge through which |
420 | // tensors flow. NULL means 0. |
421 | // |
422 | // TODO(jart): Rename to NodeEdges. |
423 | s.Update(Run(db, R"sql( |
424 | CREATE TABLE IF NOT EXISTS NodeInputs ( |
425 | rowid INTEGER PRIMARY KEY, |
426 | graph_id INTEGER NOT NULL, |
427 | node_id INTEGER NOT NULL, |
428 | idx INTEGER NOT NULL, |
429 | input_node_id INTEGER NOT NULL, |
430 | input_node_idx INTEGER, |
431 | is_control INTEGER |
432 | ) |
433 | )sql" )); |
434 | |
435 | s.Update(Run(db, R"sql( |
436 | CREATE UNIQUE INDEX IF NOT EXISTS NodeInputsIndex |
437 | ON NodeInputs (graph_id, node_id, idx) |
438 | )sql" )); |
439 | |
440 | return s; |
441 | } |
442 | |
443 | } // namespace tensorflow |
444 | |