1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "NetworkComparator.h" |
18 | |
19 | #include "glow/Graph/Graph.h" |
20 | #include "glow/Graph/Node.h" |
21 | |
22 | #include "glow/ExecutionEngine/ExecutionEngine.h" |
23 | #include "glow/Graph/Hook.h" |
24 | #include "glow/Graph/Utils.h" |
25 | #include "glow/Support/Debug.h" |
26 | #include "llvm/ADT/DenseMap.h" |
27 | #include "llvm/Support/FormatVariadic.h" |
28 | |
29 | #define DEBUG_TYPE "verifier" |
30 | |
31 | using namespace glow; |
32 | |
33 | void NetworkComparatorBase::dumpTensors( |
34 | std::unordered_map<std::string, Tensor *> tensors, |
35 | const std::string &layerName, const std::string &prefix) { |
36 | // TODO: Need to add different flavours of dumping. |
37 | auto it = tensors.begin(); |
38 | while (it != tensors.end()) { |
39 | std::error_code EC; |
40 | DCHECK(it->second) << "Tensor is Null\n" ; |
41 | llvm::raw_fd_ostream fs(prefix + "_" + it->first + "_" + layerName, EC); |
42 | it->second->dump(fs, std::numeric_limits<unsigned>::max()); |
43 | fs.close(); |
44 | it++; |
45 | } |
46 | } |
47 | bool NetworkComparatorBase::checkTensors( |
48 | std::unordered_map<std::string, Tensor *> &refOuts, |
49 | std::unordered_map<std::string, Tensor *> &checkOuts) { |
50 | if (refOuts.size() != checkOuts.size()) { |
51 | LOG(ERROR) << "Backends produced different number of results\n" ; |
52 | return false; |
53 | } |
54 | auto itRef = refOuts.begin(); |
55 | auto itCheck = checkOuts.begin(); |
56 | auto endRef = refOuts.end(); |
57 | auto endCheck = checkOuts.end(); |
58 | while (itRef != endRef && itCheck != endCheck) { |
59 | if (!itRef->second->isEqual(*(itCheck->second), numericCmpThreshold_, |
60 | /* verbose */ true)) { |
61 | LOG(INFO) << "Error: " << itRef->first << "\n" ; |
62 | return false; |
63 | } |
64 | itRef++; |
65 | itCheck++; |
66 | } |
67 | return true; |
68 | } |
69 | |
70 | NetworkComparatorBase::NetworkComparatorBase( |
71 | Module &mod, const std::string &referenceBackend, |
72 | const std::string &testBackend, float numericCmpThreshold, |
73 | bool dumpTensorsForBadLayer) |
74 | : numericCmpThreshold_(numericCmpThreshold), inputModule_(&mod), |
75 | dumpTensorsForBadLayer_(dumpTensorsForBadLayer) { |
76 | DCHECK(mod.getFunctions().size() == 1) |
77 | << "Module must have exactly one functions" ; |
78 | EERefNet_.setBackendName(referenceBackend); |
79 | mod.clone(&EERefNet_.getModule()); |
80 | EETestNet_.setBackendName(testBackend); |
81 | mod.clone(&EETestNet_.getModule()); |
82 | } |
83 | |
84 | NetworkComparatorBase::InOutTensors RecursiveLayerComparator::hookAndRun( |
85 | llvm::StringRef layerName, PlaceholderBindings *bindings, bool isRef) { |
86 | ExecutionEngine execEngine; |
87 | execEngine.setBackendName(isRef ? EERefNet_.getBackendName() |
88 | : EETestNet_.getBackendName()); |
89 | inputModule_->clone(&execEngine.getModule()); |
90 | HookedFunction hook = hookNode(execEngine.getSingleFunctionFromModule(), |
91 | layerName, /* hookInputs */ true); |
92 | PlaceholderBindings &inferBindings = |
93 | isRef ? inferBindingsRef_ : inferBindingsCheck_; |
94 | inferBindings.allocate(execEngine.getModule().getPlaceholders()); |
95 | // Copy the tensors from the bindings to the inference bindings |
96 | // we feed to the Temp network. |
97 | for (auto &PH : bindings->pairs()) { |
98 | auto iPH = inferBindings.getPlaceholderByNameSlow(PH.first->getName()); |
99 | inferBindings.get(iPH)->assign(&PH.second); |
100 | } |
101 | InOutTensors inOutTensors; |
102 | for (const auto &P : hook.outputs) { |
103 | Tensor *t = inferBindings.get(P); |
104 | std::string str = P->getName().str(); |
105 | DCHECK(t) << "Placeholder not " << str << " found in the bindings\n" ; |
106 | inOutTensors.outputs[str] = t; |
107 | } |
108 | for (const auto &P : hook.inputs) { |
109 | Tensor *t = inferBindings.get(P); |
110 | std::string str = P->getName().str(); |
111 | DCHECK(t) << "Placeholder " << str << " not found in the bindings\n" ; |
112 | inOutTensors.inputs[str] = t; |
113 | } |
114 | auto fName = hook.function->getName(); |
115 | execEngine.compile(CompilationMode::Infer); |
116 | execEngine.run(inferBindings, fName); |
117 | return inOutTensors; |
118 | } |
119 | |
120 | bool RecursiveLayerComparator::verify(PlaceholderBindings *bindings) { |
121 | bool allPassed = true; |
122 | // Sort the nodes in topological order to test nodes in that order. |
123 | // Tensors flow through the network in topological order, testing the layers |
124 | // in that order will allow us to see how errors propagate. |
125 | GraphPostOrderVisitor visitor(*EERefNet_.getSingleFunctionFromModule()); |
126 | llvm::ArrayRef<Node *> order = visitor.getPostOrder(); |
127 | for (auto const *nodePtr : order) { |
128 | auto &node = *nodePtr; |
129 | DCHECK(nodePtr) << "Node is empty!" ; |
130 | if (llvm::isa<SaveNode>(&node) || llvm::isa<Constant>(&node) || |
131 | llvm::isa<Placeholder>(&node)) { |
132 | continue; |
133 | } |
134 | llvm::StringRef layerName = node.getName(); |
135 | llvm::StringRef kName = node.getKindName(); |
136 | LOG(INFO) << "Verifying layer: " << layerName.data() |
137 | << "\tType: " << kName.data() << "\n" ; |
138 | auto refOuts = hookAndRun(layerName, bindings, /*isRef*/ true); |
139 | auto checkOuts = hookAndRun(layerName, bindings, /*isRef*/ false); |
140 | if (!checkTensors(refOuts.outputs, checkOuts.outputs)) { |
141 | LOG(ERROR) << "\tResults differ\n" ; |
142 | LOG(ERROR) << "\tDumping tensors\n" ; |
143 | dumpTensors(refOuts.outputs, layerName.data(), "ref_output" ); |
144 | dumpTensors(refOuts.inputs, layerName.data(), "input" ); |
145 | brokenLayers_.push_back(layerName.str()); |
146 | allPassed = false; |
147 | } |
148 | inferBindingsCheck_.clear(); |
149 | inferBindingsRef_.clear(); |
150 | LOG(INFO) << "DONE Verifying layer: " << layerName.data() << "\n" ; |
151 | } |
152 | return allPassed; |
153 | } |
154 | RecursiveLayerComparator::RecursiveLayerComparator( |
155 | Module &mod, const std::string &referenceBackend, |
156 | const std::string &testBackend, float numericCmpThreshold, |
157 | bool dumpTensorsForBadLayer) |
158 | : NetworkComparatorBase(mod, referenceBackend, testBackend, |
159 | numericCmpThreshold, dumpTensorsForBadLayer) {} |
160 | |
161 | void IntermediateLayerComparator::hookSingleNodeInPlace( |
162 | Node &node, std::list<SaveNode *> &saveNodes, |
163 | std::list<Placeholder *> &hookPlaceholders) { |
164 | std::string layerName = node.getName().str(); |
165 | for (unsigned i = 0; i < node.getNumResults(); ++i) { |
166 | std::string saveName = node.getOutputName(i).str(); |
167 | saveName += "_" + layerName + "_hook" ; |
168 | SaveNode *save = |
169 | node.getParent()->createSave(saveName, node.getNthResult(i)); |
170 | saveNodes.emplace_back(save); |
171 | hookPlaceholders.emplace_back(save->getPlaceholder()); |
172 | } |
173 | } |
174 | void IntermediateLayerComparator::hookNodesInPlace( |
175 | Function *func, std::list<SaveNode *> &saveNodes, |
176 | std::list<Placeholder *> &hookPlaceholders) { |
177 | DEBUG_GLOW(LOG(INFO) << "Before hooking the function: " << func->dumpDAG()); |
178 | for (Node &node : func->getNodes()) { |
179 | if (llvm::isa<SaveNode>(&node) || llvm::isa<Constant>(&node) || |
180 | llvm::isa<Placeholder>(&node)) { |
181 | continue; |
182 | } |
183 | hookSingleNodeInPlace(node, saveNodes, hookPlaceholders); |
184 | } |
185 | DEBUG_GLOW(LOG(INFO) << "After hooking the function: " << func->dumpDAG()); |
186 | } |
187 | |
188 | void IntermediateLayerComparator::copyInputBindingsToHookedBindings( |
189 | PlaceholderBindings &hookedBindigs, PlaceholderBindings &inputBindings) { |
190 | // Copy tensors from input bindings to the bindings we use for Inference. |
191 | for (auto &PH : inputBindings.pairs()) { |
192 | auto iPH = hookedBindigs.getPlaceholderByNameSlow(PH.first->getName()); |
193 | hookedBindigs.get(iPH)->assign(&PH.second); |
194 | } |
195 | } |
196 | |
197 | void IntermediateLayerComparator::getIntermediateResults( |
198 | ExecutionEngine &networkExecEngine, PlaceholderBindings *inputBindings, |
199 | PlaceholderBindings &hookedBindigs) { |
200 | std::list<SaveNode *> saveNodes; |
201 | std::list<Placeholder *> hookPlaceholders; |
202 | Function *func = networkExecEngine.getSingleFunctionFromModule(); |
203 | std::string newName = func->getName().str(); |
204 | Function *hookedFunction = func->clone(newName + "_hooked" ); |
205 | // Instrument all the nodes in hookedFunction by inserts Save nodes. |
206 | hookNodesInPlace(hookedFunction, saveNodes, hookPlaceholders); |
207 | hookedBindigs.allocate(networkExecEngine.getModule().getPlaceholders()); |
208 | // Copy values from inputBindings to the allocated hookedBindigs. |
209 | copyInputBindingsToHookedBindings(hookedBindigs, *inputBindings); |
210 | networkExecEngine.compile(CompilationMode::Infer); |
211 | networkExecEngine.run(hookedBindigs, hookedFunction->getName()); |
212 | DEBUG_GLOW(LOG(INFO) << "Network has " << hookPlaceholders.size() |
213 | << " hooks inserted and " << hookedBindigs.pairs().size() |
214 | << " total bindings" ); |
215 | DEBUG_GLOW(LOG(INFO) << "Network after running and compiling the function: " |
216 | << hookedFunction->dumpDAG()); |
217 | hookedFunction->getParent()->eraseFunction(hookedFunction); |
218 | } |
219 | |
220 | void IntermediateLayerComparator::fillSingleLayerInputs( |
221 | const Node &originalNode, Node *singleLayerNode, Module &singleLayerMod, |
222 | std::unordered_map<std::string, Tensor *> &singleLayerInputMap, |
223 | PlaceholderBindings &singleLayerBindings) { |
224 | // Copy the types used in the node to the singleLayerModule. |
225 | for (unsigned idx = 0, e = singleLayerNode->getNumResults(); idx < e; ++idx) { |
226 | singleLayerNode->setType( |
227 | idx, singleLayerMod.uniqueType(*singleLayerNode->getType(idx))); |
228 | } |
229 | |
230 | for (size_t idx = 0; idx < originalNode.getNumInputs(); idx++) { |
231 | size_t resNo = originalNode.getNthInput(idx).getResNo(); |
232 | Node *inputNode = originalNode.getNthInput(idx).getNode(); |
233 | std::string hookedPlaceholderName = inputNode->getName().str(); |
234 | Node *inputToFeed = nullptr; |
235 | DCHECK(!llvm::isa<SaveNode>(inputNode)) |
236 | << "SaveNode as an input was not hooked!" ; |
237 | if (llvm::isa<Constant>(inputNode)) { |
238 | // Constants live in the module, getting them from the reloaded |
239 | // module. They get deleted after running. |
240 | DEBUG_GLOW(LOG(INFO) << "\t\tInput name: " << hookedPlaceholderName |
241 | << " NodeType: " << inputNode->getKindName() |
242 | << "\n" ); |
243 | Constant *constNode = |
244 | inputModule_->getConstantByName(hookedPlaceholderName); |
245 | DCHECK(constNode) << "Constant not found\n" ; |
246 | Tensor &payLoad = constNode->getPayloadMutable(); |
247 | inputToFeed = singleLayerMod.createConstant(constNode->getName(), payLoad, |
248 | constNode->getLayout()); |
249 | singleLayerInputMap[originalNode.getInputName(idx)] = &payLoad; |
250 | } else { |
251 | if (!llvm::isa<Placeholder>(inputNode)) { |
252 | // If the input is placeholder the name stays the |
253 | // same since these don't get hooked. |
254 | std::string outputName = inputNode->getOutputName(resNo).str(); |
255 | hookedPlaceholderName = |
256 | outputName + "_" + hookedPlaceholderName + "_hook" ; |
257 | } |
258 | DEBUG_GLOW(LOG(INFO) << "\t\tInput name: " << hookedPlaceholderName |
259 | << " NodeType: " << inputNode->getKindName() |
260 | << "\n" ); |
261 | Placeholder *PH = |
262 | refHookedBindings_.getPlaceholderByNameSlow(hookedPlaceholderName); |
263 | DCHECK(PH) << "Placeholder not found in the hooked bindings" ; |
264 | Tensor *payloadTensor = refHookedBindings_.get(PH); |
265 | Placeholder *singleLayerPH = singleLayerMod.createPlaceholder( |
266 | PH->getType(), PH->getName(), PH->isTraining(), PH->getLayout()); |
267 | singleLayerBindings.allocate(singleLayerPH); |
268 | singleLayerBindings.get(singleLayerPH)->assign(payloadTensor); |
269 | inputToFeed = singleLayerPH; |
270 | singleLayerInputMap[originalNode.getInputName(idx)] = payloadTensor; |
271 | } |
272 | singleLayerNode->setNthInput(idx, inputToFeed); |
273 | } |
274 | } |
275 | |
276 | void IntermediateLayerComparator::runAndGetoutputSingleLayer( |
277 | ExecutionEngine &singleLayerExecEng, |
278 | PlaceholderBindings &singleLayerBindings, Node *singleLayerNode, |
279 | std::unordered_map<std::string, Tensor *> &singleLayerOutputs, |
280 | std::unordered_map<std::string, Tensor *> &refOutputs) { |
281 | std::list<SaveNode *> singleLayerSaveNodes; |
282 | std::list<Placeholder *> singleLayerOutputPHs; |
283 | hookSingleNodeInPlace(*singleLayerNode, singleLayerSaveNodes, |
284 | singleLayerOutputPHs); |
285 | singleLayerBindings.allocate(singleLayerOutputPHs); |
286 | DEBUG_GLOW(LOG(INFO) << "\t\tSingle layer network" |
287 | << singleLayerNode->getParent()->dumpDAG()); |
288 | singleLayerExecEng.compile(CompilationMode::Infer); |
289 | singleLayerExecEng.run(singleLayerBindings); |
290 | for (Placeholder *PH : singleLayerOutputPHs) { |
291 | singleLayerOutputs[PH->getName().str()] = singleLayerBindings.get(PH); |
292 | Placeholder *refPH = |
293 | refHookedBindings_.getPlaceholderByNameSlow(PH->getName()); |
294 | refOutputs[PH->getName().str()] = refHookedBindings_.get(refPH); |
295 | } |
296 | } |
297 | |
298 | bool IntermediateLayerComparator::testSingleLayer(const Node *node) { |
299 | bool pass = true; |
300 | std::unordered_map<std::string, Tensor *> singleLayerInputMap; |
301 | ExecutionEngine singleLayerExecEng(EETestNet_.getBackendName()); |
302 | PlaceholderBindings singleLayerBindings; |
303 | Module &singleLayerMod = singleLayerExecEng.getModule(); |
304 | Function *singleLayerFunc = singleLayerMod.createFunction(node->getName()); |
305 | Node *singleLayerNode = node->clone(); |
306 | llvm::StringRef layerName = node->getName(); |
307 | llvm::StringRef kindName = node->getKindName(); |
308 | LOG(INFO) << "Verifying layer: " << layerName.data() |
309 | << "\tType: " << kindName.data() << "\n" ; |
310 | singleLayerFunc->addNode(singleLayerNode); |
311 | // 1) Dynamically build a net made out of one layer and feed the |
312 | // placeholders in. |
313 | fillSingleLayerInputs(*node, singleLayerNode, singleLayerMod, |
314 | singleLayerInputMap, singleLayerBindings); |
315 | // 2) Run the network and get outputs. |
316 | std::unordered_map<std::string, Tensor *> singleLayerOutputs; |
317 | std::unordered_map<std::string, Tensor *> refOutputs; |
318 | runAndGetoutputSingleLayer(singleLayerExecEng, singleLayerBindings, |
319 | singleLayerNode, singleLayerOutputs, refOutputs); |
320 | if (!checkTensors(refOutputs, singleLayerOutputs)) { |
321 | LOG(ERROR) << "\tResults differ\n" ; |
322 | LOG(ERROR) << "\tDumping tensors\n" ; |
323 | dumpTensors(refOutputs, layerName.str(), "ref_output" ); |
324 | dumpTensors(singleLayerInputMap, layerName.str(), "input" ); |
325 | brokenLayers_.push_back(layerName.str()); |
326 | pass = false; |
327 | } |
328 | LOG(INFO) << "DONE Verifying layer: " << layerName.data() << "\n" ; |
329 | return pass; |
330 | } |
331 | |
332 | bool IntermediateLayerComparator::verify(PlaceholderBindings *bindings) { |
333 | bool allPassed = true; |
334 | // Instrument all the layers with Save nodes. |
335 | getIntermediateResults(EERefNet_, bindings, refHookedBindings_); |
336 | getIntermediateResults(EETestNet_, bindings, testHookedBindings_); |
337 | if (refHookedBindings_.compare(&refHookedBindings_, &testHookedBindings_, |
338 | numericCmpThreshold_)) { |
339 | LOG(INFO) << "All intermediate results match." ; |
340 | return true; |
341 | } |
342 | // Sort the nodes in topological order to test nodes in that order. |
343 | // Tensors flow through the network in topological order, testing the layers |
344 | // in that order will allow us to see how errors propagate. |
345 | Function *func = *inputModule_->getFunctions().begin(); |
346 | GraphPostOrderVisitor visitor(*func); |
347 | llvm::ArrayRef<Node *> order = visitor.getPostOrder(); |
348 | for (auto const *nodePtr : order) { |
349 | if (llvm::isa<SaveNode>(nodePtr) || llvm::isa<Constant>(nodePtr) || |
350 | llvm::isa<Placeholder>(nodePtr)) { |
351 | continue; |
352 | } |
353 | allPassed &= testSingleLayer(nodePtr); |
354 | } |
355 | return allPassed; |
356 | } |
357 | |
358 | IntermediateLayerComparator::IntermediateLayerComparator( |
359 | Module &mod, const std::string &referenceBackend, |
360 | const std::string &testBackend, float numericCmpThreshold, |
361 | bool dumpTensorsForBadLayer) |
362 | : NetworkComparatorBase(mod, referenceBackend, testBackend, |
363 | numericCmpThreshold, dumpTensorsForBadLayer) {} |
364 | |