1/*
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include "Base.h"
17
18#include "glow/Exporter/ONNXModelWriter.h"
19#include "glow/Flags/Flags.h"
20#include "glow/Importer/ONNXIFIModelLoader.h"
21#include "glow/Optimizer/GraphOptimizer/FunctionPasses.h"
22#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
23#include "glow/Runtime/TraceExporter.h"
24
25#include "llvm/Support/Format.h"
26#include <glog/logging.h>
27
28namespace glow {
29namespace onnxifi {
30
31extern bool GlowDumpDebugTraces;
32
33namespace {
34const char *compatibilityFunctionName = "check";
35
36/// Get the width of the \p dtype. If dtype is not recognized or undefined, we
37/// return 0 width.
38unsigned getOnnxTensorDescriptorElementSize(unsigned dtype) {
39 constexpr unsigned size = 17;
40 const static std::array<unsigned, size> mapping{
41 0u /* ONNXIFI_DATATYPE_UNDEFINED */,
42 4u /* ONNXIFI_DATATYPE_FLOAT32 */,
43 1u /* ONNXIFI_DATATYPE_UINT8 */,
44 1u /* ONNXIFI_DATATYPE_INT8 */,
45 2u /* ONNXIFI_DATATYPE_UINT16 */,
46 2u /* ONNXIFI_DATATYPE_INT16 */,
47 4u /* ONNXIFI_DATATYPE_INT32 */,
48 8u /* ONNXIFI_DATATYPE_INT64 */,
49 0u /* undefined */,
50 0u /* undefined */,
51 2u /* ONNXIFI_DATATYPE_FLOAT16 */,
52 8u /* ONNXIFI_DATATYPE_FLOAT64 */,
53 4u /* ONNXIFI_DATATYPE_UINT32 */,
54 8u /* ONNXIFI_DATATYPE_UINT64 */,
55 16u /* ONNXIFI_DATATYPE_COMPLEX64 */,
56 32u /*ONNXIFI_DATATYPE_COMPLEX128 */,
57 2u /* ONNXIFI_DATATYPE_BFLOAT16 */};
58 return (dtype < size) ? mapping[dtype] : 0;
59}
60
61} // namespace
62
63void saveOnnxifiModel(Function *F) {
64 std::string fname = F->getName().str() + ".zip";
65 LOG(INFO) << "Saving model to " << fname;
66 Error err = Error::empty();
67 constexpr size_t kIrVer = 7, kOpsetVer = 9;
68 {
69 ONNXModelWriter onnxWR(fname, *F, kIrVer, kOpsetVer, &err, false, true,
70 glow::flags::UseCustomOpsForExport);
71 }
72 if (ERR_TO_BOOL(std::move(err))) {
73 LOG(ERROR) << "ONNXModelWriter failed to write model: " << fname;
74 }
75}
76
77onnxStatus Backend::checkGraphCompatibility(const void *onnxModel,
78 size_t onnxModelSize) {
79 Module module;
80
81 std::unique_ptr<ONNXIFIModelLoader> loader;
82 // Note: Because we are not loading inputs as Placeholders, we need to
83 // explicitly not do constant folding in the loader. This is because the
84 // inputs will be loaded as uninitialized Constants. We do this for now
85 // because backends may have limitations on some ops to have inputs as
86 // Constants, such as a Convolution's weights. In the future we should clean
87 // this up so that we load Constants and Placeholders based on the actual
88 // eventual input graph.
89 CompilationContext cctx;
90 auto loaderOrErr = ONNXIFIModelLoader::parse(
91 onnxModel, onnxModelSize, 0 /*weightCount*/,
92 nullptr /*weightDescriptors*/, module, compatibilityFunctionName, cctx,
93 /* staticPlaceholderTypes */ nullptr,
94 /* loadInputsAsPlaceholdersForOnnx */ false, getUseOnnx(),
95 /* constFoldInLoader */ false);
96 if (loaderOrErr) {
97 loader = std::move(*loaderOrErr);
98 } else {
99 // TODO: Use a more specific ONNXIFI error code here to denote what about
100 // this operator is not supported (shape, type, etc).
101 LOG(INFO)
102 << "ONNXIFI checkGraphCompatibility incompatibility found when loading "
103 "protobuf: "
104 << ERR_TO_STRING(loaderOrErr.takeError(), /*warning*/ true);
105 return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR;
106 }
107
108 if (!glowBackend_) {
109 return ONNXIFI_STATUS_INTERNAL_ERROR;
110 }
111
112 if (module.getFunctions().size() != 1) {
113 LOG(ERROR) << "Should have exactly one Function in compatibiliity mode.";
114 return ONNXIFI_STATUS_INTERNAL_ERROR;
115 }
116 Function *function = *module.getFunctions().begin();
117
118 // Check if the function is verified as valid for Glow/the backend -- if not
119 // then conservatively early return on unsupported operator.
120 if (!function->verify(glowBackend_.get())) {
121 LOG(INFO)
122 << "ONNXIFI checkGraphCompatibility incompatibility: Glow function "
123 "verification failed.";
124 return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR;
125 }
126
127 // Perform the normal optimization pipeline, returning an internal error if we
128 // encounter an issue during optimization. Skip backend support checking
129 // because we check it next below via acceptForExecution().
130 cctx.optimizationOpts.skipBackendSupportCheck = true;
131 auto optErr = glow::optimizeFunction(function, *glowBackend_, cctx);
132 if (optErr) {
133 LOG(ERROR) << "Error during glow::optimizeFunction():\n" +
134 ERR_TO_STRING(std::move(optErr));
135 return ONNXIFI_STATUS_INTERNAL_ERROR;
136 }
137
138 const auto &nodes = function->getNodes();
139 for (const auto &node : nodes) {
140 if (!glowBackend_->acceptForExecution(node)) {
141 LOG(INFO) << "ONNXIFI checkGraphCompatibility incompatibility, op "
142 "rejected by backend: "
143 << node.getDebugDesc();
144 // TODO: Use a more specific ONNXIFI error code here to denote what
145 // about this operator is not supported (shape, type, etc).
146 return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR;
147 }
148 }
149 return ONNXIFI_STATUS_SUCCESS;
150}
151
152bool Event::signal(onnxStatus status) {
153 {
154 std::lock_guard<std::mutex> guard(mutex_);
155 if (fired_) {
156 return false;
157 }
158 status_ = status;
159 fired_ = true;
160 }
161 cond_.notify_all();
162 return true;
163}
164
165onnxStatus Event::wait() {
166 std::unique_lock<std::mutex> guard(mutex_);
167 cond_.wait(guard, [this] { return fired_ == true; });
168 return status_;
169}
170
171std::pair<bool, onnxStatus> Event::waitFor(size_t timeoutMs) {
172 DCHECK_GT(timeoutMs, 0)
173 << "0 timeoutMs should instead use Event::wait to wait indefinitely";
174
175 auto endTime =
176 std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs);
177
178 std::unique_lock<std::mutex> guard(mutex_);
179 while (!fired_) {
180 if (std::cv_status::timeout == cond_.wait_until(guard, endTime)) {
181 return {/*signalled*/ false, status_};
182 }
183 }
184
185 return {/*signalled*/ true, status_};
186}
187
188void Graph::setZeroLengthSequence(dim_t maxSeqLength) {
189 Type ty(ElemKind::Int64ITy, {maxSeqLength});
190 zeroLengthSequence_.reset(ty);
191 zeroLengthSequence_.zero();
192}
193
194bool Graph::bindPlaceholders(const ONNXIFIModelLoader &loader,
195 LoadedPlaceholderNameMap *loadedPHNames) {
196 onnxInputToPlaceholder_ = loader.getInputVarsMapping();
197 onnxOutputToPlaceholder_ = loader.getOutputVarsMapping();
198 onnxInputNames_ = loader.getPositionalInputNames();
199 onnxInputPlaceholders_.reserve(onnxInputNames_.size());
200 for (const auto &i : onnxInputNames_) {
201 const auto it = onnxInputToPlaceholder_.find(i);
202 if (it == onnxInputToPlaceholder_.end()) {
203 break;
204 }
205 onnxInputPlaceholders_.push_back(it->second);
206 }
207 if (onnxInputPlaceholders_.size() != onnxInputToPlaceholder_.size()) {
208 onnxInputPlaceholders_.clear();
209 }
210 onnxOutputNames_ = loader.getPositionalOutputNames();
211 onnxOutputPlaceholders_.reserve(onnxOutputNames_.size());
212 for (const auto &i : onnxOutputNames_) {
213 const auto it = onnxOutputToPlaceholder_.find(i);
214 if (it == onnxOutputToPlaceholder_.end()) {
215 break;
216 }
217 onnxOutputPlaceholders_.push_back(it->second);
218 }
219 if (onnxOutputPlaceholders_.size() != onnxOutputToPlaceholder_.size()) {
220 onnxOutputPlaceholders_.clear();
221 }
222
223 // If requested, load all of the input/output PHs into loadedPHNames, which is
224 // essentially the onnxInputToPlaceholder_/onnxOutputToPlaceholder_ with
225 // keys/values swapped and combined in a single map.
226 if (loadedPHNames) {
227#define REVERSE_MAPPING(ORIG_VEC_, ORIG_MAP_) \
228 if (ORIG_VEC_.size() > 0) { \
229 for (size_t i = 0, e = ORIG_VEC_.size(); i < e; i++) { \
230 auto &name = ORIG_VEC_[i]; \
231 auto it = ORIG_MAP_.find(name); \
232 if (it == ORIG_MAP_.end()) { \
233 LOG(ERROR) << "Issue finding matching positional PH for " << name; \
234 return false; \
235 } \
236 if (!loadedPHNames->emplace(it->second, std::make_pair(name, i)) \
237 .second) { \
238 LOG(ERROR) \
239 << "Loading model error due to input or output name reuse: " \
240 << name; \
241 return false; \
242 } \
243 } \
244 }
245 REVERSE_MAPPING(onnxInputNames_, onnxInputToPlaceholder_);
246 REVERSE_MAPPING(onnxOutputNames_, onnxOutputToPlaceholder_);
247#undef REVERSE_MAPPING
248 }
249
250 return true;
251}
252
253onnxStatus Graph::adjustInputs(uint32_t inputsCount,
254 const onnxTensorDescriptorV1 *inputDescriptors,
255 ExecutionContext *ctx) {
256 // Create tensors for input placeholders
257 auto &externalIOBindings = ctx->getExternalIOBindings();
258 for (unsigned i = 0; i < inputsCount; ++i) {
259 const auto &inOnnxTensor = inputDescriptors[i];
260 auto *inOnnxBuffer = reinterpret_cast<void *>(inOnnxTensor.buffer);
261 Placeholder *inPhPtr;
262
263 if (onnxInputNames_.size() == inputsCount) {
264 inPhPtr = onnxInputPlaceholders_[i];
265 } else {
266 auto inPhIt = onnxInputToPlaceholder_.find(inOnnxTensor.name);
267 if (inPhIt == onnxInputToPlaceholder_.end()) {
268 LOG(ERROR) << "Input Name Unknown: " << inOnnxTensor.name;
269 return ONNXIFI_STATUS_UNIDENTIFIED_NAME;
270 }
271 inPhPtr = inPhIt->getValue();
272 }
273
274 const bool quantizedInput = inPhPtr->getType()->isQuantizedType();
275 std::vector<dim_t> inOnnxTensorDims(inOnnxTensor.dimensions);
276 size_t inOnnxTensorSize = 1;
277 for (unsigned j = 0; j < inOnnxTensor.dimensions; ++j) {
278 inOnnxTensorDims[j] = inOnnxTensor.shape[j];
279 inOnnxTensorSize *= inOnnxTensorDims[j];
280 }
281
282 if (inOnnxTensorSize > inPhPtr->getType()->size()) {
283 std::stringstream ss;
284 for (const auto j : inOnnxTensorDims) {
285 ss << j << ", ";
286 }
287 ss << " vs ";
288 auto sizes = inPhPtr->getType()->dims();
289 for (const auto j : sizes) {
290 ss << j << ", ";
291 }
292 LOG(ERROR) << "Input tensor is too large: " << inOnnxTensorSize << " vs "
293 << inPhPtr->getType()->size() << ": " << inOnnxTensor.name
294 << ", shape: " << ss.str();
295 return ONNXIFI_STATUS_INVALID_SHAPE;
296 }
297
298 // Only allocate a tensor if insufficient backing storage is provided.
299 const unsigned elementSize =
300 getOnnxTensorDescriptorElementSize(inOnnxTensor.dataType);
301 const unsigned glowElementSize = inPhPtr->getType()->getElementSize();
302 bool needsUpcast = false;
303 if (elementSize != glowElementSize) {
304 // If an input tensor is of int32 type and the placeholder expects int64,
305 // we can allow upcasting the same way as Caffe2 allows.
306 if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_INT32 &&
307 inPhPtr->getType()->getElementType() == ElemKind::Int64ITy) {
308 needsUpcast = true;
309 } else {
310 LOG(ERROR) << "Input data width (" << elementSize
311 << ") is different from glow placeholder data width ("
312 << glowElementSize << "), tensor: " << inOnnxTensor.name
313 << ", onnxifi data type: " << inOnnxTensor.dataType
314 << ", glow data type: "
315 << inPhPtr->getType()->getElementName().data();
316 return ONNXIFI_STATUS_INVALID_DATATYPE;
317 }
318 }
319 bool processed = true;
320 size_t onnxBytes = inOnnxTensorSize * elementSize;
321 if (!quantizedInput && !needsUpcast) {
322 if (inPhPtr->dims().equals(inOnnxTensorDims)) {
323 externalIOBindings.emplace_back(
324 std::piecewise_construct, std::forward_as_tuple(inPhPtr),
325 std::forward_as_tuple(inOnnxBuffer, inPhPtr->getType()));
326 } else if (glow::flags::EnablePartialTensors &&
327 backendPtr_->getBackend().supportsPartialTensors()) {
328 // We have a partial input buffer. Create a padded unowned tensor that
329 // remembers the actual size of the input.
330 externalIOBindings.emplace_back(
331 std::piecewise_construct, std::forward_as_tuple(inPhPtr),
332 std::forward_as_tuple(inOnnxBuffer, inPhPtr->getType(), onnxBytes));
333 } else if (!inOnnxBuffer && inPhPtr->getType()->size() <=
334 zeroLengthSequence_.getType().size()) {
335 externalIOBindings.emplace_back(
336 std::piecewise_construct, std::forward_as_tuple(inPhPtr),
337 std::forward_as_tuple((void *)(zeroLengthSequence_.getUnsafePtr()),
338 inPhPtr->getType()));
339 } else {
340 processed = false;
341 }
342 } else {
343 processed = false;
344 }
345
346 if (processed) {
347 continue;
348 }
349
350 llvm::Optional<Tensor> inputTensorOpt = tensorPool_.get(inPhPtr->getType());
351 if (!inputTensorOpt.hasValue()) {
352 DLOG(FATAL) << "Tensorpool tensor not found for input "
353 << inOnnxTensor.name;
354 return ONNXIFI_STATUS_INTERNAL_ERROR;
355 }
356 // We want fresh DeviceResidencyInfo for this fresh Tensor.
357 externalIOBindings.emplace_back(inPhPtr,
358 std::move(inputTensorOpt.getValue()));
359 Tensor &inputTensor = externalIOBindings.back().second;
360 inputTensor.resetDeviceInfo();
361
362 if (quantizedInput) {
363 // Right now we only support quantized input with one set of
364 // quantization parameters
365 bool supported = true;
366 if (inOnnxTensor.quantizationParams == 1) {
367 if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_UINT8) {
368 inputTensor.zero();
369 if (inOnnxBuffer) {
370 auto TH = inputTensor.getHandle<int8_t>();
371 uint8_t *data = (uint8_t *)(inOnnxBuffer);
372 for (size_t k = 0; k < onnxBytes; ++k) {
373 TH.raw(k) = (int8_t)(data[k] - UINT8_TO_INT8_SHIFT);
374 }
375 }
376 continue;
377 } else if (inOnnxTensor.dataType != ONNXIFI_DATATYPE_INT8) {
378 supported = false;
379 }
380 } else {
381 supported = false;
382 }
383 if (!supported) {
384 return ONNXIFI_STATUS_INVALID_DATATYPE;
385 }
386 }
387
388 if (needsUpcast) {
389 if (!inOnnxBuffer) {
390 LOG(ERROR) << "Can't upcast tensor " << inOnnxTensor.name
391 << " because buffer is not present";
392 return ONNXIFI_STATUS_INTERNAL_ERROR;
393 }
394 if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_INT32 &&
395 inPhPtr->getType()->getElementType() == ElemKind::Int64ITy) {
396 auto TH = inputTensor.getHandle<int64_t>();
397 auto data = reinterpret_cast<int32_t *>(inOnnxBuffer);
398 for (size_t k = 0; k < inOnnxTensorSize; ++k) {
399 TH.raw(k) = (int64_t)data[k];
400 }
401 } else {
402 LOG(ERROR) << "Unsupported upcast for tensor " << inOnnxTensor.name
403 << ", onnxifi data type: " << inOnnxTensor.dataType
404 << ", glow data type: "
405 << inPhPtr->getType()->getElementName().data();
406 return ONNXIFI_STATUS_INVALID_DATATYPE;
407 }
408 }
409
410 // Copy the input from onnxTensorDescriptor unless it has a NULL buffer
411 // pointer (which is a valid case if the tensor is empty).
412 if (inOnnxBuffer) {
413 memcpy(inputTensor.getUnsafePtr(), inOnnxBuffer, onnxBytes);
414 // Pad remaining space with zeroes.
415 memset(inputTensor.getUnsafePtr() + onnxBytes, 0,
416 inputTensor.getSizeInBytes() - onnxBytes);
417 } else {
418 inputTensor.zero();
419 }
420 }
421 return ONNXIFI_STATUS_SUCCESS;
422}
423
424onnxStatus Graph::setIOAndRun(uint32_t inputsCount,
425 const onnxTensorDescriptorV1 *inputDescriptors,
426 uint32_t outputsCount,
427 const onnxTensorDescriptorV1 *outputDescriptors,
428 EventPtr outputEvent,
429 onnxTraceEventList *traceEvents) {
430 auto ctx = glow::make_unique<ExecutionContext>();
431
432 TraceContext *traceContext = nullptr;
433 if (traceEvents || glow::flags::DumpDebugTraces ||
434 TraceExporterRegistry::getInstance()->shouldTrace()) {
435 ctx->setTraceContext(glow::make_unique<TraceContext>(TraceLevel::STANDARD));
436 traceContext = ctx->getTraceContext();
437 traceContext->setThreadName("Onnxifi");
438 }
439 TRACE_EVENT_SCOPE(traceContext, TraceLevel::RUNTIME, "Onnxifi::setIOAndRun");
440 TRACE_EVENT_SCOPE_NAMED(traceContext, TraceLevel::RUNTIME, "adjustInputs",
441 aiEvent);
442
443 auto r = adjustInputs(inputsCount, inputDescriptors, ctx.get());
444 if (r != ONNXIFI_STATUS_SUCCESS) {
445 return r;
446 }
447
448 size_t seq = 0;
449 if (glow::onnxifi::flags::SaveIO) {
450 seq = ioDumpCounter_++;
451 std::stringstream ss;
452 ss << "input_" << seq << ".onnx";
453 std::ofstream of(ss.str(), std::ios::binary);
454 if (!of) {
455 LOG(ERROR) << "Cannot create input file " << ss.str();
456 } else {
457 ONNX_NAMESPACE::GraphProto inputG;
458 for (const auto &p : ctx->getExternalIOBindings()) {
459 auto *t = inputG.add_initializer();
460 const auto &inputTensor = p.second;
461 size_t unpaddedSize = inputTensor.getUnpaddedSizeInBytes();
462 size_t tensorSize = inputTensor.getSizeInBytes();
463 if (unpaddedSize == tensorSize) {
464 ONNXModelWriter::writeTensor(inputTensor, t,
465 glow::flags::UseCustomOpsForExport);
466 } else {
467 // If the input is a partial tensor, then save only the part that has
468 // data.
469 auto ty = inputTensor.getType();
470 auto dims = ty.dims().vec();
471 dims[0] = dims[0] * unpaddedSize / tensorSize;
472 const auto &resized = inputTensor.getUnowned(dims);
473 ONNXModelWriter::writeTensor(resized, t,
474 glow::flags::UseCustomOpsForExport);
475 VLOG(1) << "Writing partial tensor " << p.first->getName().str()
476 << " full size=" << inputTensor.getType().toString()
477 << " partial size=" << inputTensor.getUnpaddedSizeInBytes()
478 << " resized size=" << resized.getType().toString();
479 }
480 t->set_name(p.first->getName().str());
481 }
482 std::string buffer;
483 inputG.SerializeToString(&buffer);
484 of << buffer;
485 }
486 }
487
488 TRACE_EVENT_SCOPE_END_NAMED(aiEvent);
489 TRACE_EVENT_SCOPE_NAMED(traceContext, TraceLevel::RUNTIME,
490 "setOnnxifiOutputs", soEvent);
491
492 // Create tensors for output placeholders
493 auto &externalIOBindings = ctx->getExternalIOBindings();
494 for (unsigned i = 0; i < outputsCount; ++i) {
495 auto &outOnnxTensor =
496 const_cast<onnxTensorDescriptorV1 &>(outputDescriptors[i]);
497 auto *outOnnxBuffer = reinterpret_cast<void *>(outOnnxTensor.buffer);
498 Placeholder *outPhPtr;
499
500 if (outputsCount == onnxOutputNames_.size()) {
501 outPhPtr = onnxOutputPlaceholders_[i];
502 } else {
503 auto outPhIt = onnxOutputToPlaceholder_.find(outOnnxTensor.name);
504 if (outPhIt == onnxOutputToPlaceholder_.end()) {
505 LOG(ERROR) << "Output name unknown: " << outOnnxTensor.name;
506 return ONNXIFI_STATUS_UNIDENTIFIED_NAME;
507 }
508 outPhPtr = outPhIt->getValue();
509 }
510 // Compute the total size of the onnxifi tensor.
511 std::vector<dim_t> outOnnxTensorDims(outOnnxTensor.dimensions);
512 dim_t outOnnxTensorSize = 1;
513 for (unsigned j = 0; j < outOnnxTensor.dimensions; ++j) {
514 outOnnxTensorDims[j] = outOnnxTensor.shape[j];
515 outOnnxTensorSize *= outOnnxTensorDims[j];
516 }
517
518 // Check that tensor provided by onnxifi is the correct size.
519 if (!outPhPtr->dims().equals(outOnnxTensorDims)) {
520 LOG(ERROR) << "Output tensor is the wrong shape: " << outOnnxTensorSize
521 << " total dims vs " << outPhPtr->getType()->size() << ": "
522 << outOnnxTensor.name;
523 return ONNXIFI_STATUS_INVALID_SHAPE;
524 }
525
526 // Set quantized output scale/output. Do not support channelwise quantized
527 // output with multiple quantization parameters for now.
528 auto type = outPhPtr->getType();
529 if (outOnnxTensor.quantizationParams == 1 && type->isQuantizedType()) {
530 const_cast<float *>(outOnnxTensor.scales)[0] = type->getScale();
531 const_cast<int32_t *>(outOnnxTensor.biases)[0] = type->getOffset();
532 }
533
534 // Create a Glow tensor backed by the memory from the provided onnxifi
535 // tensor and bind it to the appropriate placeholder for the graph output.
536 Tensor outputTensor(outOnnxBuffer, outPhPtr->getType());
537 externalIOBindings.emplace_back(outPhPtr, std::move(outputTensor));
538 }
539 TRACE_EVENT_SCOPE_END_NAMED(soEvent);
540
541 if (ctx->getTraceContext()) {
542 ctx->getTraceContext()->setThreadName("Request Thread");
543 }
544
545 // End trace scope before calling into run. run() can trigger the completion
546 // callback which deallocates ctx and traceContext. So it will no longer be
547 // safe to access the trace context after calling into run().
548 TRACE_EVENT_SCOPE_END();
549 auto ret = run(std::move(ctx), outputEvent, traceEvents);
550 if (glow::onnxifi::flags::SaveIO) {
551 // We need to wait for the execution to finish in order to extract output
552 // values.
553 outputEvent->wait();
554 std::stringstream ss;
555 ss << "output_" << seq << ".onnx";
556 std::ofstream of(ss.str(), std::ios::binary);
557 if (!of) {
558 LOG(ERROR) << "Cannot create output file " << ss.str();
559 } else {
560 ONNX_NAMESPACE::GraphProto inputG;
561 for (unsigned i = 0; i < outputsCount; ++i) {
562 const auto &outOnnxTensor = outputDescriptors[i];
563 auto *outOnnxBuffer = reinterpret_cast<void *>(outOnnxTensor.buffer);
564 Placeholder *outPhPtr;
565 if (outputsCount == onnxOutputNames_.size()) {
566 outPhPtr = onnxOutputPlaceholders_[i];
567 } else {
568 auto outPhIt = onnxOutputToPlaceholder_.find(outOnnxTensor.name);
569 CHECK(outPhIt != onnxOutputToPlaceholder_.end());
570 outPhPtr = outPhIt->getValue();
571 }
572 Tensor outputTensor(outOnnxBuffer, outPhPtr->getType());
573 auto *t = inputG.add_initializer();
574 ONNXModelWriter::writeTensor(outputTensor, t,
575 glow::flags::UseCustomOpsForExport);
576 t->set_name(outPhPtr->getName().str());
577 }
578 std::string buffer;
579 inputG.SerializeToString(&buffer);
580 of << buffer;
581 }
582 }
583
584 return ret;
585}
586
587void Graph::setTraceEvents(onnxTraceEventList *traceEvents,
588 TraceContext *traceContext) {
589 /// Export trace events to any registered glow trace exporters
590 if (traceContext) {
591 TraceExporterRegistry::getInstance()->exportTrace(traceContext);
592 }
593
594 if (!traceEvents || !traceContext) {
595 return;
596 }
597 /// Internally we use steady_clock, but our interface is system_clock
598 /// timestamps. Do a simple conversion.
599 auto steadyTS = TraceEvent::now();
600 auto systemTS = std::chrono::duration_cast<std::chrono::microseconds>(
601 std::chrono::system_clock::now().time_since_epoch())
602 .count();
603
604 // Timestamps are uint64_t so branch rather than use abs(), we want to make
605 // sure we always subtract the smaller from the larger value to avoid
606 // underflowing the uint64_t. Then if the timestamp should be moved backwards
607 // negate the result.
608 int64_t offset = long(steadyTS) > systemTS ? -(steadyTS - systemTS)
609 : (systemTS - steadyTS);
610 TRACE_EVENT_SCOPE(traceContext, TraceLevel::RUNTIME,
611 "Onnxifi::setTraceEvents");
612
613 std::vector<onnxTraceEvent *> traceEventsVec;
614 for (const auto &glowTraceEvent : traceContext->getTraceEvents()) {
615 auto *traceEvent = new onnxTraceEvent();
616 traceEvent->eventType = glowTraceEvent.type;
617 traceEvent->timestamp = glowTraceEvent.timestamp + offset;
618 traceEvent->tid = glowTraceEvent.tid;
619 traceEvent->duration = glowTraceEvent.duration;
620 size_t nameSize = std::min(glowTraceEvent.name.size(),
621 (size_t)ONNXIFI_TRACE_EVENT_NAME_SIZE);
622 strncpy(traceEvent->eventName, glowTraceEvent.name.c_str(), nameSize);
623 traceEvent->eventName[nameSize] = '\0';
624 traceEventsVec.push_back(traceEvent);
625 }
626
627 traceEvents->numEvents = traceEventsVec.size();
628 traceEvents->traceEvents = new onnxTraceEvent *[traceEventsVec.size()];
629 DCHECK(traceEvents->traceEvents);
630 std::copy(traceEventsVec.begin(), traceEventsVec.end(),
631 traceEvents->traceEvents);
632}
633
634void Graph::releaseTraceEvents(onnxTraceEventList *traceEvents) {
635 DCHECK(traceEvents);
636 for (uint64_t i = 0; i < traceEvents->numEvents; ++i) {
637 onnxTraceEvent *traceEvent = traceEvents->traceEvents[i];
638 delete traceEvent;
639 }
640
641 delete[] traceEvents->traceEvents;
642}
643
644Graph::Graph(BackendPtr backendPtr) : backendPtr_(backendPtr) {}
645
646} // namespace onnxifi
647} // namespace glow
648