1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "NetworkComparator.h"
18
19#include "glow/Graph/Graph.h"
20#include "glow/Graph/Node.h"
21
22#include "glow/ExecutionEngine/ExecutionEngine.h"
23#include "glow/Graph/Hook.h"
24#include "glow/Graph/Utils.h"
25#include "glow/Support/Debug.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/Support/FormatVariadic.h"
28
29#define DEBUG_TYPE "verifier"
30
31using namespace glow;
32
33void NetworkComparatorBase::dumpTensors(
34 std::unordered_map<std::string, Tensor *> tensors,
35 const std::string &layerName, const std::string &prefix) {
36 // TODO: Need to add different flavours of dumping.
37 auto it = tensors.begin();
38 while (it != tensors.end()) {
39 std::error_code EC;
40 DCHECK(it->second) << "Tensor is Null\n";
41 llvm::raw_fd_ostream fs(prefix + "_" + it->first + "_" + layerName, EC);
42 it->second->dump(fs, std::numeric_limits<unsigned>::max());
43 fs.close();
44 it++;
45 }
46}
47bool NetworkComparatorBase::checkTensors(
48 std::unordered_map<std::string, Tensor *> &refOuts,
49 std::unordered_map<std::string, Tensor *> &checkOuts) {
50 if (refOuts.size() != checkOuts.size()) {
51 LOG(ERROR) << "Backends produced different number of results\n";
52 return false;
53 }
54 auto itRef = refOuts.begin();
55 auto itCheck = checkOuts.begin();
56 auto endRef = refOuts.end();
57 auto endCheck = checkOuts.end();
58 while (itRef != endRef && itCheck != endCheck) {
59 if (!itRef->second->isEqual(*(itCheck->second), numericCmpThreshold_,
60 /* verbose */ true)) {
61 LOG(INFO) << "Error: " << itRef->first << "\n";
62 return false;
63 }
64 itRef++;
65 itCheck++;
66 }
67 return true;
68}
69
70NetworkComparatorBase::NetworkComparatorBase(
71 Module &mod, const std::string &referenceBackend,
72 const std::string &testBackend, float numericCmpThreshold,
73 bool dumpTensorsForBadLayer)
74 : numericCmpThreshold_(numericCmpThreshold), inputModule_(&mod),
75 dumpTensorsForBadLayer_(dumpTensorsForBadLayer) {
76 DCHECK(mod.getFunctions().size() == 1)
77 << "Module must have exactly one functions";
78 EERefNet_.setBackendName(referenceBackend);
79 mod.clone(&EERefNet_.getModule());
80 EETestNet_.setBackendName(testBackend);
81 mod.clone(&EETestNet_.getModule());
82}
83
84NetworkComparatorBase::InOutTensors RecursiveLayerComparator::hookAndRun(
85 llvm::StringRef layerName, PlaceholderBindings *bindings, bool isRef) {
86 ExecutionEngine execEngine;
87 execEngine.setBackendName(isRef ? EERefNet_.getBackendName()
88 : EETestNet_.getBackendName());
89 inputModule_->clone(&execEngine.getModule());
90 HookedFunction hook = hookNode(execEngine.getSingleFunctionFromModule(),
91 layerName, /* hookInputs */ true);
92 PlaceholderBindings &inferBindings =
93 isRef ? inferBindingsRef_ : inferBindingsCheck_;
94 inferBindings.allocate(execEngine.getModule().getPlaceholders());
95 // Copy the tensors from the bindings to the inference bindings
96 // we feed to the Temp network.
97 for (auto &PH : bindings->pairs()) {
98 auto iPH = inferBindings.getPlaceholderByNameSlow(PH.first->getName());
99 inferBindings.get(iPH)->assign(&PH.second);
100 }
101 InOutTensors inOutTensors;
102 for (const auto &P : hook.outputs) {
103 Tensor *t = inferBindings.get(P);
104 std::string str = P->getName().str();
105 DCHECK(t) << "Placeholder not " << str << " found in the bindings\n";
106 inOutTensors.outputs[str] = t;
107 }
108 for (const auto &P : hook.inputs) {
109 Tensor *t = inferBindings.get(P);
110 std::string str = P->getName().str();
111 DCHECK(t) << "Placeholder " << str << " not found in the bindings\n";
112 inOutTensors.inputs[str] = t;
113 }
114 auto fName = hook.function->getName();
115 execEngine.compile(CompilationMode::Infer);
116 execEngine.run(inferBindings, fName);
117 return inOutTensors;
118}
119
120bool RecursiveLayerComparator::verify(PlaceholderBindings *bindings) {
121 bool allPassed = true;
122 // Sort the nodes in topological order to test nodes in that order.
123 // Tensors flow through the network in topological order, testing the layers
124 // in that order will allow us to see how errors propagate.
125 GraphPostOrderVisitor visitor(*EERefNet_.getSingleFunctionFromModule());
126 llvm::ArrayRef<Node *> order = visitor.getPostOrder();
127 for (auto const *nodePtr : order) {
128 auto &node = *nodePtr;
129 DCHECK(nodePtr) << "Node is empty!";
130 if (llvm::isa<SaveNode>(&node) || llvm::isa<Constant>(&node) ||
131 llvm::isa<Placeholder>(&node)) {
132 continue;
133 }
134 llvm::StringRef layerName = node.getName();
135 llvm::StringRef kName = node.getKindName();
136 LOG(INFO) << "Verifying layer: " << layerName.data()
137 << "\tType: " << kName.data() << "\n";
138 auto refOuts = hookAndRun(layerName, bindings, /*isRef*/ true);
139 auto checkOuts = hookAndRun(layerName, bindings, /*isRef*/ false);
140 if (!checkTensors(refOuts.outputs, checkOuts.outputs)) {
141 LOG(ERROR) << "\tResults differ\n";
142 LOG(ERROR) << "\tDumping tensors\n";
143 dumpTensors(refOuts.outputs, layerName.data(), "ref_output");
144 dumpTensors(refOuts.inputs, layerName.data(), "input");
145 brokenLayers_.push_back(layerName.str());
146 allPassed = false;
147 }
148 inferBindingsCheck_.clear();
149 inferBindingsRef_.clear();
150 LOG(INFO) << "DONE Verifying layer: " << layerName.data() << "\n";
151 }
152 return allPassed;
153}
154RecursiveLayerComparator::RecursiveLayerComparator(
155 Module &mod, const std::string &referenceBackend,
156 const std::string &testBackend, float numericCmpThreshold,
157 bool dumpTensorsForBadLayer)
158 : NetworkComparatorBase(mod, referenceBackend, testBackend,
159 numericCmpThreshold, dumpTensorsForBadLayer) {}
160
161void IntermediateLayerComparator::hookSingleNodeInPlace(
162 Node &node, std::list<SaveNode *> &saveNodes,
163 std::list<Placeholder *> &hookPlaceholders) {
164 std::string layerName = node.getName().str();
165 for (unsigned i = 0; i < node.getNumResults(); ++i) {
166 std::string saveName = node.getOutputName(i).str();
167 saveName += "_" + layerName + "_hook";
168 SaveNode *save =
169 node.getParent()->createSave(saveName, node.getNthResult(i));
170 saveNodes.emplace_back(save);
171 hookPlaceholders.emplace_back(save->getPlaceholder());
172 }
173}
174void IntermediateLayerComparator::hookNodesInPlace(
175 Function *func, std::list<SaveNode *> &saveNodes,
176 std::list<Placeholder *> &hookPlaceholders) {
177 DEBUG_GLOW(LOG(INFO) << "Before hooking the function: " << func->dumpDAG());
178 for (Node &node : func->getNodes()) {
179 if (llvm::isa<SaveNode>(&node) || llvm::isa<Constant>(&node) ||
180 llvm::isa<Placeholder>(&node)) {
181 continue;
182 }
183 hookSingleNodeInPlace(node, saveNodes, hookPlaceholders);
184 }
185 DEBUG_GLOW(LOG(INFO) << "After hooking the function: " << func->dumpDAG());
186}
187
188void IntermediateLayerComparator::copyInputBindingsToHookedBindings(
189 PlaceholderBindings &hookedBindigs, PlaceholderBindings &inputBindings) {
190 // Copy tensors from input bindings to the bindings we use for Inference.
191 for (auto &PH : inputBindings.pairs()) {
192 auto iPH = hookedBindigs.getPlaceholderByNameSlow(PH.first->getName());
193 hookedBindigs.get(iPH)->assign(&PH.second);
194 }
195}
196
197void IntermediateLayerComparator::getIntermediateResults(
198 ExecutionEngine &networkExecEngine, PlaceholderBindings *inputBindings,
199 PlaceholderBindings &hookedBindigs) {
200 std::list<SaveNode *> saveNodes;
201 std::list<Placeholder *> hookPlaceholders;
202 Function *func = networkExecEngine.getSingleFunctionFromModule();
203 std::string newName = func->getName().str();
204 Function *hookedFunction = func->clone(newName + "_hooked");
205 // Instrument all the nodes in hookedFunction by inserts Save nodes.
206 hookNodesInPlace(hookedFunction, saveNodes, hookPlaceholders);
207 hookedBindigs.allocate(networkExecEngine.getModule().getPlaceholders());
208 // Copy values from inputBindings to the allocated hookedBindigs.
209 copyInputBindingsToHookedBindings(hookedBindigs, *inputBindings);
210 networkExecEngine.compile(CompilationMode::Infer);
211 networkExecEngine.run(hookedBindigs, hookedFunction->getName());
212 DEBUG_GLOW(LOG(INFO) << "Network has " << hookPlaceholders.size()
213 << " hooks inserted and " << hookedBindigs.pairs().size()
214 << " total bindings");
215 DEBUG_GLOW(LOG(INFO) << "Network after running and compiling the function: "
216 << hookedFunction->dumpDAG());
217 hookedFunction->getParent()->eraseFunction(hookedFunction);
218}
219
220void IntermediateLayerComparator::fillSingleLayerInputs(
221 const Node &originalNode, Node *singleLayerNode, Module &singleLayerMod,
222 std::unordered_map<std::string, Tensor *> &singleLayerInputMap,
223 PlaceholderBindings &singleLayerBindings) {
224 // Copy the types used in the node to the singleLayerModule.
225 for (unsigned idx = 0, e = singleLayerNode->getNumResults(); idx < e; ++idx) {
226 singleLayerNode->setType(
227 idx, singleLayerMod.uniqueType(*singleLayerNode->getType(idx)));
228 }
229
230 for (size_t idx = 0; idx < originalNode.getNumInputs(); idx++) {
231 size_t resNo = originalNode.getNthInput(idx).getResNo();
232 Node *inputNode = originalNode.getNthInput(idx).getNode();
233 std::string hookedPlaceholderName = inputNode->getName().str();
234 Node *inputToFeed = nullptr;
235 DCHECK(!llvm::isa<SaveNode>(inputNode))
236 << "SaveNode as an input was not hooked!";
237 if (llvm::isa<Constant>(inputNode)) {
238 // Constants live in the module, getting them from the reloaded
239 // module. They get deleted after running.
240 DEBUG_GLOW(LOG(INFO) << "\t\tInput name: " << hookedPlaceholderName
241 << " NodeType: " << inputNode->getKindName()
242 << "\n");
243 Constant *constNode =
244 inputModule_->getConstantByName(hookedPlaceholderName);
245 DCHECK(constNode) << "Constant not found\n";
246 Tensor &payLoad = constNode->getPayloadMutable();
247 inputToFeed = singleLayerMod.createConstant(constNode->getName(), payLoad,
248 constNode->getLayout());
249 singleLayerInputMap[originalNode.getInputName(idx)] = &payLoad;
250 } else {
251 if (!llvm::isa<Placeholder>(inputNode)) {
252 // If the input is placeholder the name stays the
253 // same since these don't get hooked.
254 std::string outputName = inputNode->getOutputName(resNo).str();
255 hookedPlaceholderName =
256 outputName + "_" + hookedPlaceholderName + "_hook";
257 }
258 DEBUG_GLOW(LOG(INFO) << "\t\tInput name: " << hookedPlaceholderName
259 << " NodeType: " << inputNode->getKindName()
260 << "\n");
261 Placeholder *PH =
262 refHookedBindings_.getPlaceholderByNameSlow(hookedPlaceholderName);
263 DCHECK(PH) << "Placeholder not found in the hooked bindings";
264 Tensor *payloadTensor = refHookedBindings_.get(PH);
265 Placeholder *singleLayerPH = singleLayerMod.createPlaceholder(
266 PH->getType(), PH->getName(), PH->isTraining(), PH->getLayout());
267 singleLayerBindings.allocate(singleLayerPH);
268 singleLayerBindings.get(singleLayerPH)->assign(payloadTensor);
269 inputToFeed = singleLayerPH;
270 singleLayerInputMap[originalNode.getInputName(idx)] = payloadTensor;
271 }
272 singleLayerNode->setNthInput(idx, inputToFeed);
273 }
274}
275
276void IntermediateLayerComparator::runAndGetoutputSingleLayer(
277 ExecutionEngine &singleLayerExecEng,
278 PlaceholderBindings &singleLayerBindings, Node *singleLayerNode,
279 std::unordered_map<std::string, Tensor *> &singleLayerOutputs,
280 std::unordered_map<std::string, Tensor *> &refOutputs) {
281 std::list<SaveNode *> singleLayerSaveNodes;
282 std::list<Placeholder *> singleLayerOutputPHs;
283 hookSingleNodeInPlace(*singleLayerNode, singleLayerSaveNodes,
284 singleLayerOutputPHs);
285 singleLayerBindings.allocate(singleLayerOutputPHs);
286 DEBUG_GLOW(LOG(INFO) << "\t\tSingle layer network"
287 << singleLayerNode->getParent()->dumpDAG());
288 singleLayerExecEng.compile(CompilationMode::Infer);
289 singleLayerExecEng.run(singleLayerBindings);
290 for (Placeholder *PH : singleLayerOutputPHs) {
291 singleLayerOutputs[PH->getName().str()] = singleLayerBindings.get(PH);
292 Placeholder *refPH =
293 refHookedBindings_.getPlaceholderByNameSlow(PH->getName());
294 refOutputs[PH->getName().str()] = refHookedBindings_.get(refPH);
295 }
296}
297
298bool IntermediateLayerComparator::testSingleLayer(const Node *node) {
299 bool pass = true;
300 std::unordered_map<std::string, Tensor *> singleLayerInputMap;
301 ExecutionEngine singleLayerExecEng(EETestNet_.getBackendName());
302 PlaceholderBindings singleLayerBindings;
303 Module &singleLayerMod = singleLayerExecEng.getModule();
304 Function *singleLayerFunc = singleLayerMod.createFunction(node->getName());
305 Node *singleLayerNode = node->clone();
306 llvm::StringRef layerName = node->getName();
307 llvm::StringRef kindName = node->getKindName();
308 LOG(INFO) << "Verifying layer: " << layerName.data()
309 << "\tType: " << kindName.data() << "\n";
310 singleLayerFunc->addNode(singleLayerNode);
311 // 1) Dynamically build a net made out of one layer and feed the
312 // placeholders in.
313 fillSingleLayerInputs(*node, singleLayerNode, singleLayerMod,
314 singleLayerInputMap, singleLayerBindings);
315 // 2) Run the network and get outputs.
316 std::unordered_map<std::string, Tensor *> singleLayerOutputs;
317 std::unordered_map<std::string, Tensor *> refOutputs;
318 runAndGetoutputSingleLayer(singleLayerExecEng, singleLayerBindings,
319 singleLayerNode, singleLayerOutputs, refOutputs);
320 if (!checkTensors(refOutputs, singleLayerOutputs)) {
321 LOG(ERROR) << "\tResults differ\n";
322 LOG(ERROR) << "\tDumping tensors\n";
323 dumpTensors(refOutputs, layerName.str(), "ref_output");
324 dumpTensors(singleLayerInputMap, layerName.str(), "input");
325 brokenLayers_.push_back(layerName.str());
326 pass = false;
327 }
328 LOG(INFO) << "DONE Verifying layer: " << layerName.data() << "\n";
329 return pass;
330}
331
332bool IntermediateLayerComparator::verify(PlaceholderBindings *bindings) {
333 bool allPassed = true;
334 // Instrument all the layers with Save nodes.
335 getIntermediateResults(EERefNet_, bindings, refHookedBindings_);
336 getIntermediateResults(EETestNet_, bindings, testHookedBindings_);
337 if (refHookedBindings_.compare(&refHookedBindings_, &testHookedBindings_,
338 numericCmpThreshold_)) {
339 LOG(INFO) << "All intermediate results match.";
340 return true;
341 }
342 // Sort the nodes in topological order to test nodes in that order.
343 // Tensors flow through the network in topological order, testing the layers
344 // in that order will allow us to see how errors propagate.
345 Function *func = *inputModule_->getFunctions().begin();
346 GraphPostOrderVisitor visitor(*func);
347 llvm::ArrayRef<Node *> order = visitor.getPostOrder();
348 for (auto const *nodePtr : order) {
349 if (llvm::isa<SaveNode>(nodePtr) || llvm::isa<Constant>(nodePtr) ||
350 llvm::isa<Placeholder>(nodePtr)) {
351 continue;
352 }
353 allPassed &= testSingleLayer(nodePtr);
354 }
355 return allPassed;
356}
357
358IntermediateLayerComparator::IntermediateLayerComparator(
359 Module &mod, const std::string &referenceBackend,
360 const std::string &testBackend, float numericCmpThreshold,
361 bool dumpTensorsForBadLayer)
362 : NetworkComparatorBase(mod, referenceBackend, testBackend,
363 numericCmpThreshold, dumpTensorsForBadLayer) {}
364