1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_DEBUG_DEBUG_GRAPH_UTILS_H_ |
17 | #define TENSORFLOW_CORE_DEBUG_DEBUG_GRAPH_UTILS_H_ |
18 | |
19 | #include <unordered_map> |
20 | #include <vector> |
21 | |
22 | #include "tensorflow/core/common_runtime/debugger_state_interface.h" |
23 | #include "tensorflow/core/common_runtime/device.h" |
24 | #include "tensorflow/core/graph/graph.h" |
25 | #include "tensorflow/core/lib/core/status.h" |
26 | #include "tensorflow/core/platform/protobuf.h" |
27 | #include "tensorflow/core/protobuf/debug.pb.h" |
28 | |
29 | namespace tensorflow { |
30 | |
31 | class DebugNodeInserter { |
32 | public: |
33 | // EXPERIMENTAL: Insert special debug ops (e.g., DebugIdentity) to graph for |
34 | // debugging. Currently, such ops need to take exactly one input and has the |
35 | // string attribute "tensor_name" to indicate what tensor it watches. |
36 | // For example, before the node insertion, the graph may look like: |
37 | // |
38 | // A:0 -----------1----------> B |
39 | // | |
40 | // ---------2-----------> C |
41 | // |
42 | // wherein the output slot 0 of node A feeds as the input to nodes B through |
43 | // edge 1 and to node C through edge 2. |
44 | // After the node insertion, assuming both B and C have non-Ref input, the |
45 | // graph becomes: |
46 | // A:0 ---3---> Copy -----------4----------> B |
47 | // | |
48 | // ---------5--------> C |
49 | // | |
50 | // ---------6--------> X |
51 | // |
52 | // If a node (e.g., B) has Ref input, the graph becomes: |
53 | // |
54 | // --------------------------------> B |
55 | // | |
56 | // A:0 ---3-----> Copy -----------4----------> C |
57 | // | |
58 | // -----------5--------> X |
59 | // |
60 | // In other words, we do not feed Refs to deep-copies to downstream nodes. |
61 | // |
62 | // Copy is the inserted deep-copy node that copies the input tensor on-device |
63 | // (e.g., CPU-to-CPU or GPU-to-GPU deep copy) that reduces the likelihood of |
64 | // racy updates during the debug watches. X is the newly created debug node |
65 | // that transforms the input (copy of the watched tensor) into a debug signal. |
66 | // |
67 | // DebugIdentity is the simplest debugging paradigm, in which the debug signal |
68 | // (i.e., X:0) equals the tensor itself. More sophisticated debug ops can be |
69 | // used to transform the tensor into other debug signals. An example is the |
70 | // DebugNanCounter op. |
71 | // |
72 | // If the nodes (A, B and C) are located on GPU and the edges from A to B or C |
73 | // is HOST_MEMORY, then the CopyHost op will be used instead of the Copy op. |
74 | static Status InsertNodes( |
75 | const protobuf::RepeatedPtrField<DebugTensorWatch>& watches, Graph* graph, |
76 | Device* device); |
77 | |
78 | // Set the parallel_iterations attribute of TensorFlow while loops |
79 | // (specifically the nodes for which IsEnter() returns true) to 1 to prevent |
80 | // any node from being executed multiple times concurrently and |
81 | // generating temporally-overlapping debug Tensor dumps. |
82 | static void DeparallelizeWhileLoops(Graph* graph, Device* device); |
83 | |
84 | // Get canonical name of a copy node. |
85 | static const string GetCopyNodeName(const string& node_name, |
86 | const int output_slot); |
87 | |
88 | // Get canonical name of a debug node. |
89 | static const string GetDebugNodeName(const string& tensor_name, |
90 | const int debug_op_num, |
91 | const string& debug_op_name); |
92 | |
93 | private: |
94 | static Status CreateCopyNode( |
95 | Graph* graph, const DeviceType device_type, const bool is_host_memory, |
96 | const string& src_node_name, const int src_output, const DataType src_dt, |
97 | const string& tensor_name, const std::vector<string>& debug_ops, |
98 | const std::vector<string>& debug_urls, Node** copy_node); |
99 | |
100 | // Parse the debug_op_name string to extract proper op name and attributes. |
101 | // debug_op_name can be the proper op name only, e.g., "DebugNumericSummary". |
102 | // It can also contain customizable keys and values. Each key-value pair is |
103 | // connected with an equal sign ("="). Multiple key-value pairs are separated |
104 | // with semicolons (";"), which optional whitespace in between, e.g., |
105 | // "DebugNumericSummary(mute_if_healthy=true, lower_bound=-100.0)". |
106 | static Status ParseDebugOpName( |
107 | const string& debug_op_name, string* debug_op_name_proper, |
108 | std::unordered_map<string, string>* attributes); |
109 | |
110 | static Status SetDebugNodeAttributes( |
111 | Node* debug_node, const std::unordered_map<string, string>& attributes); |
112 | |
113 | static Status CreateDebugNode(Graph* graph, const Device& device, |
114 | const string& src_copy_node_name, |
115 | const DataType src_dt, |
116 | const string& tensor_name, |
117 | const std::vector<string>& debug_urls, |
118 | const int debug_op_num, |
119 | const string& debug_op_name, Node** debug_node); |
120 | // TODO(cais): Cut down the number of args to this method. |
121 | |
122 | friend class DebugGraphUtilsTest; |
123 | }; |
124 | } // namespace tensorflow |
125 | |
126 | #endif // TENSORFLOW_CORE_DEBUG_DEBUG_GRAPH_UTILS_H_ |
127 | |