1 | /* |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include "Base.h" |
17 | |
18 | #include "glow/Exporter/ONNXModelWriter.h" |
19 | #include "glow/Flags/Flags.h" |
20 | #include "glow/Importer/ONNXIFIModelLoader.h" |
21 | #include "glow/Optimizer/GraphOptimizer/FunctionPasses.h" |
22 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
23 | #include "glow/Runtime/TraceExporter.h" |
24 | |
25 | #include "llvm/Support/Format.h" |
26 | #include <glog/logging.h> |
27 | |
28 | namespace glow { |
29 | namespace onnxifi { |
30 | |
31 | extern bool GlowDumpDebugTraces; |
32 | |
33 | namespace { |
34 | const char *compatibilityFunctionName = "check" ; |
35 | |
36 | /// Get the width of the \p dtype. If dtype is not recognized or undefined, we |
37 | /// return 0 width. |
38 | unsigned getOnnxTensorDescriptorElementSize(unsigned dtype) { |
39 | constexpr unsigned size = 17; |
40 | const static std::array<unsigned, size> mapping{ |
41 | 0u /* ONNXIFI_DATATYPE_UNDEFINED */, |
42 | 4u /* ONNXIFI_DATATYPE_FLOAT32 */, |
43 | 1u /* ONNXIFI_DATATYPE_UINT8 */, |
44 | 1u /* ONNXIFI_DATATYPE_INT8 */, |
45 | 2u /* ONNXIFI_DATATYPE_UINT16 */, |
46 | 2u /* ONNXIFI_DATATYPE_INT16 */, |
47 | 4u /* ONNXIFI_DATATYPE_INT32 */, |
48 | 8u /* ONNXIFI_DATATYPE_INT64 */, |
49 | 0u /* undefined */, |
50 | 0u /* undefined */, |
51 | 2u /* ONNXIFI_DATATYPE_FLOAT16 */, |
52 | 8u /* ONNXIFI_DATATYPE_FLOAT64 */, |
53 | 4u /* ONNXIFI_DATATYPE_UINT32 */, |
54 | 8u /* ONNXIFI_DATATYPE_UINT64 */, |
55 | 16u /* ONNXIFI_DATATYPE_COMPLEX64 */, |
56 | 32u /*ONNXIFI_DATATYPE_COMPLEX128 */, |
57 | 2u /* ONNXIFI_DATATYPE_BFLOAT16 */}; |
58 | return (dtype < size) ? mapping[dtype] : 0; |
59 | } |
60 | |
61 | } // namespace |
62 | |
63 | void saveOnnxifiModel(Function *F) { |
64 | std::string fname = F->getName().str() + ".zip" ; |
65 | LOG(INFO) << "Saving model to " << fname; |
66 | Error err = Error::empty(); |
67 | constexpr size_t kIrVer = 7, kOpsetVer = 9; |
68 | { |
69 | ONNXModelWriter onnxWR(fname, *F, kIrVer, kOpsetVer, &err, false, true, |
70 | glow::flags::UseCustomOpsForExport); |
71 | } |
72 | if (ERR_TO_BOOL(std::move(err))) { |
73 | LOG(ERROR) << "ONNXModelWriter failed to write model: " << fname; |
74 | } |
75 | } |
76 | |
77 | onnxStatus Backend::checkGraphCompatibility(const void *onnxModel, |
78 | size_t onnxModelSize) { |
79 | Module module; |
80 | |
81 | std::unique_ptr<ONNXIFIModelLoader> loader; |
82 | // Note: Because we are not loading inputs as Placeholders, we need to |
83 | // explicitly not do constant folding in the loader. This is because the |
84 | // inputs will be loaded as uninitialized Constants. We do this for now |
85 | // because backends may have limitations on some ops to have inputs as |
86 | // Constants, such as a Convolution's weights. In the future we should clean |
87 | // this up so that we load Constants and Placeholders based on the actual |
88 | // eventual input graph. |
89 | CompilationContext cctx; |
90 | auto loaderOrErr = ONNXIFIModelLoader::parse( |
91 | onnxModel, onnxModelSize, 0 /*weightCount*/, |
92 | nullptr /*weightDescriptors*/, module, compatibilityFunctionName, cctx, |
93 | /* staticPlaceholderTypes */ nullptr, |
94 | /* loadInputsAsPlaceholdersForOnnx */ false, getUseOnnx(), |
95 | /* constFoldInLoader */ false); |
96 | if (loaderOrErr) { |
97 | loader = std::move(*loaderOrErr); |
98 | } else { |
99 | // TODO: Use a more specific ONNXIFI error code here to denote what about |
100 | // this operator is not supported (shape, type, etc). |
101 | LOG(INFO) |
102 | << "ONNXIFI checkGraphCompatibility incompatibility found when loading " |
103 | "protobuf: " |
104 | << ERR_TO_STRING(loaderOrErr.takeError(), /*warning*/ true); |
105 | return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR; |
106 | } |
107 | |
108 | if (!glowBackend_) { |
109 | return ONNXIFI_STATUS_INTERNAL_ERROR; |
110 | } |
111 | |
112 | if (module.getFunctions().size() != 1) { |
113 | LOG(ERROR) << "Should have exactly one Function in compatibiliity mode." ; |
114 | return ONNXIFI_STATUS_INTERNAL_ERROR; |
115 | } |
116 | Function *function = *module.getFunctions().begin(); |
117 | |
118 | // Check if the function is verified as valid for Glow/the backend -- if not |
119 | // then conservatively early return on unsupported operator. |
120 | if (!function->verify(glowBackend_.get())) { |
121 | LOG(INFO) |
122 | << "ONNXIFI checkGraphCompatibility incompatibility: Glow function " |
123 | "verification failed." ; |
124 | return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR; |
125 | } |
126 | |
127 | // Perform the normal optimization pipeline, returning an internal error if we |
128 | // encounter an issue during optimization. Skip backend support checking |
129 | // because we check it next below via acceptForExecution(). |
130 | cctx.optimizationOpts.skipBackendSupportCheck = true; |
131 | auto optErr = glow::optimizeFunction(function, *glowBackend_, cctx); |
132 | if (optErr) { |
133 | LOG(ERROR) << "Error during glow::optimizeFunction():\n" + |
134 | ERR_TO_STRING(std::move(optErr)); |
135 | return ONNXIFI_STATUS_INTERNAL_ERROR; |
136 | } |
137 | |
138 | const auto &nodes = function->getNodes(); |
139 | for (const auto &node : nodes) { |
140 | if (!glowBackend_->acceptForExecution(node)) { |
141 | LOG(INFO) << "ONNXIFI checkGraphCompatibility incompatibility, op " |
142 | "rejected by backend: " |
143 | << node.getDebugDesc(); |
144 | // TODO: Use a more specific ONNXIFI error code here to denote what |
145 | // about this operator is not supported (shape, type, etc). |
146 | return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR; |
147 | } |
148 | } |
149 | return ONNXIFI_STATUS_SUCCESS; |
150 | } |
151 | |
152 | bool Event::signal(onnxStatus status) { |
153 | { |
154 | std::lock_guard<std::mutex> guard(mutex_); |
155 | if (fired_) { |
156 | return false; |
157 | } |
158 | status_ = status; |
159 | fired_ = true; |
160 | } |
161 | cond_.notify_all(); |
162 | return true; |
163 | } |
164 | |
165 | onnxStatus Event::wait() { |
166 | std::unique_lock<std::mutex> guard(mutex_); |
167 | cond_.wait(guard, [this] { return fired_ == true; }); |
168 | return status_; |
169 | } |
170 | |
171 | std::pair<bool, onnxStatus> Event::waitFor(size_t timeoutMs) { |
172 | DCHECK_GT(timeoutMs, 0) |
173 | << "0 timeoutMs should instead use Event::wait to wait indefinitely" ; |
174 | |
175 | auto endTime = |
176 | std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs); |
177 | |
178 | std::unique_lock<std::mutex> guard(mutex_); |
179 | while (!fired_) { |
180 | if (std::cv_status::timeout == cond_.wait_until(guard, endTime)) { |
181 | return {/*signalled*/ false, status_}; |
182 | } |
183 | } |
184 | |
185 | return {/*signalled*/ true, status_}; |
186 | } |
187 | |
188 | void Graph::setZeroLengthSequence(dim_t maxSeqLength) { |
189 | Type ty(ElemKind::Int64ITy, {maxSeqLength}); |
190 | zeroLengthSequence_.reset(ty); |
191 | zeroLengthSequence_.zero(); |
192 | } |
193 | |
194 | bool Graph::bindPlaceholders(const ONNXIFIModelLoader &loader, |
195 | LoadedPlaceholderNameMap *loadedPHNames) { |
196 | onnxInputToPlaceholder_ = loader.getInputVarsMapping(); |
197 | onnxOutputToPlaceholder_ = loader.getOutputVarsMapping(); |
198 | onnxInputNames_ = loader.getPositionalInputNames(); |
199 | onnxInputPlaceholders_.reserve(onnxInputNames_.size()); |
200 | for (const auto &i : onnxInputNames_) { |
201 | const auto it = onnxInputToPlaceholder_.find(i); |
202 | if (it == onnxInputToPlaceholder_.end()) { |
203 | break; |
204 | } |
205 | onnxInputPlaceholders_.push_back(it->second); |
206 | } |
207 | if (onnxInputPlaceholders_.size() != onnxInputToPlaceholder_.size()) { |
208 | onnxInputPlaceholders_.clear(); |
209 | } |
210 | onnxOutputNames_ = loader.getPositionalOutputNames(); |
211 | onnxOutputPlaceholders_.reserve(onnxOutputNames_.size()); |
212 | for (const auto &i : onnxOutputNames_) { |
213 | const auto it = onnxOutputToPlaceholder_.find(i); |
214 | if (it == onnxOutputToPlaceholder_.end()) { |
215 | break; |
216 | } |
217 | onnxOutputPlaceholders_.push_back(it->second); |
218 | } |
219 | if (onnxOutputPlaceholders_.size() != onnxOutputToPlaceholder_.size()) { |
220 | onnxOutputPlaceholders_.clear(); |
221 | } |
222 | |
223 | // If requested, load all of the input/output PHs into loadedPHNames, which is |
224 | // essentially the onnxInputToPlaceholder_/onnxOutputToPlaceholder_ with |
225 | // keys/values swapped and combined in a single map. |
226 | if (loadedPHNames) { |
227 | #define REVERSE_MAPPING(ORIG_VEC_, ORIG_MAP_) \ |
228 | if (ORIG_VEC_.size() > 0) { \ |
229 | for (size_t i = 0, e = ORIG_VEC_.size(); i < e; i++) { \ |
230 | auto &name = ORIG_VEC_[i]; \ |
231 | auto it = ORIG_MAP_.find(name); \ |
232 | if (it == ORIG_MAP_.end()) { \ |
233 | LOG(ERROR) << "Issue finding matching positional PH for " << name; \ |
234 | return false; \ |
235 | } \ |
236 | if (!loadedPHNames->emplace(it->second, std::make_pair(name, i)) \ |
237 | .second) { \ |
238 | LOG(ERROR) \ |
239 | << "Loading model error due to input or output name reuse: " \ |
240 | << name; \ |
241 | return false; \ |
242 | } \ |
243 | } \ |
244 | } |
245 | REVERSE_MAPPING(onnxInputNames_, onnxInputToPlaceholder_); |
246 | REVERSE_MAPPING(onnxOutputNames_, onnxOutputToPlaceholder_); |
247 | #undef REVERSE_MAPPING |
248 | } |
249 | |
250 | return true; |
251 | } |
252 | |
253 | onnxStatus Graph::adjustInputs(uint32_t inputsCount, |
254 | const onnxTensorDescriptorV1 *inputDescriptors, |
255 | ExecutionContext *ctx) { |
256 | // Create tensors for input placeholders |
257 | auto &externalIOBindings = ctx->getExternalIOBindings(); |
258 | for (unsigned i = 0; i < inputsCount; ++i) { |
259 | const auto &inOnnxTensor = inputDescriptors[i]; |
260 | auto *inOnnxBuffer = reinterpret_cast<void *>(inOnnxTensor.buffer); |
261 | Placeholder *inPhPtr; |
262 | |
263 | if (onnxInputNames_.size() == inputsCount) { |
264 | inPhPtr = onnxInputPlaceholders_[i]; |
265 | } else { |
266 | auto inPhIt = onnxInputToPlaceholder_.find(inOnnxTensor.name); |
267 | if (inPhIt == onnxInputToPlaceholder_.end()) { |
268 | LOG(ERROR) << "Input Name Unknown: " << inOnnxTensor.name; |
269 | return ONNXIFI_STATUS_UNIDENTIFIED_NAME; |
270 | } |
271 | inPhPtr = inPhIt->getValue(); |
272 | } |
273 | |
274 | const bool quantizedInput = inPhPtr->getType()->isQuantizedType(); |
275 | std::vector<dim_t> inOnnxTensorDims(inOnnxTensor.dimensions); |
276 | size_t inOnnxTensorSize = 1; |
277 | for (unsigned j = 0; j < inOnnxTensor.dimensions; ++j) { |
278 | inOnnxTensorDims[j] = inOnnxTensor.shape[j]; |
279 | inOnnxTensorSize *= inOnnxTensorDims[j]; |
280 | } |
281 | |
282 | if (inOnnxTensorSize > inPhPtr->getType()->size()) { |
283 | std::stringstream ss; |
284 | for (const auto j : inOnnxTensorDims) { |
285 | ss << j << ", " ; |
286 | } |
287 | ss << " vs " ; |
288 | auto sizes = inPhPtr->getType()->dims(); |
289 | for (const auto j : sizes) { |
290 | ss << j << ", " ; |
291 | } |
292 | LOG(ERROR) << "Input tensor is too large: " << inOnnxTensorSize << " vs " |
293 | << inPhPtr->getType()->size() << ": " << inOnnxTensor.name |
294 | << ", shape: " << ss.str(); |
295 | return ONNXIFI_STATUS_INVALID_SHAPE; |
296 | } |
297 | |
298 | // Only allocate a tensor if insufficient backing storage is provided. |
299 | const unsigned elementSize = |
300 | getOnnxTensorDescriptorElementSize(inOnnxTensor.dataType); |
301 | const unsigned glowElementSize = inPhPtr->getType()->getElementSize(); |
302 | bool needsUpcast = false; |
303 | if (elementSize != glowElementSize) { |
304 | // If an input tensor is of int32 type and the placeholder expects int64, |
305 | // we can allow upcasting the same way as Caffe2 allows. |
306 | if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_INT32 && |
307 | inPhPtr->getType()->getElementType() == ElemKind::Int64ITy) { |
308 | needsUpcast = true; |
309 | } else { |
310 | LOG(ERROR) << "Input data width (" << elementSize |
311 | << ") is different from glow placeholder data width (" |
312 | << glowElementSize << "), tensor: " << inOnnxTensor.name |
313 | << ", onnxifi data type: " << inOnnxTensor.dataType |
314 | << ", glow data type: " |
315 | << inPhPtr->getType()->getElementName().data(); |
316 | return ONNXIFI_STATUS_INVALID_DATATYPE; |
317 | } |
318 | } |
319 | bool processed = true; |
320 | size_t onnxBytes = inOnnxTensorSize * elementSize; |
321 | if (!quantizedInput && !needsUpcast) { |
322 | if (inPhPtr->dims().equals(inOnnxTensorDims)) { |
323 | externalIOBindings.emplace_back( |
324 | std::piecewise_construct, std::forward_as_tuple(inPhPtr), |
325 | std::forward_as_tuple(inOnnxBuffer, inPhPtr->getType())); |
326 | } else if (glow::flags::EnablePartialTensors && |
327 | backendPtr_->getBackend().supportsPartialTensors()) { |
328 | // We have a partial input buffer. Create a padded unowned tensor that |
329 | // remembers the actual size of the input. |
330 | externalIOBindings.emplace_back( |
331 | std::piecewise_construct, std::forward_as_tuple(inPhPtr), |
332 | std::forward_as_tuple(inOnnxBuffer, inPhPtr->getType(), onnxBytes)); |
333 | } else if (!inOnnxBuffer && inPhPtr->getType()->size() <= |
334 | zeroLengthSequence_.getType().size()) { |
335 | externalIOBindings.emplace_back( |
336 | std::piecewise_construct, std::forward_as_tuple(inPhPtr), |
337 | std::forward_as_tuple((void *)(zeroLengthSequence_.getUnsafePtr()), |
338 | inPhPtr->getType())); |
339 | } else { |
340 | processed = false; |
341 | } |
342 | } else { |
343 | processed = false; |
344 | } |
345 | |
346 | if (processed) { |
347 | continue; |
348 | } |
349 | |
350 | llvm::Optional<Tensor> inputTensorOpt = tensorPool_.get(inPhPtr->getType()); |
351 | if (!inputTensorOpt.hasValue()) { |
352 | DLOG(FATAL) << "Tensorpool tensor not found for input " |
353 | << inOnnxTensor.name; |
354 | return ONNXIFI_STATUS_INTERNAL_ERROR; |
355 | } |
356 | // We want fresh DeviceResidencyInfo for this fresh Tensor. |
357 | externalIOBindings.emplace_back(inPhPtr, |
358 | std::move(inputTensorOpt.getValue())); |
359 | Tensor &inputTensor = externalIOBindings.back().second; |
360 | inputTensor.resetDeviceInfo(); |
361 | |
362 | if (quantizedInput) { |
363 | // Right now we only support quantized input with one set of |
364 | // quantization parameters |
365 | bool supported = true; |
366 | if (inOnnxTensor.quantizationParams == 1) { |
367 | if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_UINT8) { |
368 | inputTensor.zero(); |
369 | if (inOnnxBuffer) { |
370 | auto TH = inputTensor.getHandle<int8_t>(); |
371 | uint8_t *data = (uint8_t *)(inOnnxBuffer); |
372 | for (size_t k = 0; k < onnxBytes; ++k) { |
373 | TH.raw(k) = (int8_t)(data[k] - UINT8_TO_INT8_SHIFT); |
374 | } |
375 | } |
376 | continue; |
377 | } else if (inOnnxTensor.dataType != ONNXIFI_DATATYPE_INT8) { |
378 | supported = false; |
379 | } |
380 | } else { |
381 | supported = false; |
382 | } |
383 | if (!supported) { |
384 | return ONNXIFI_STATUS_INVALID_DATATYPE; |
385 | } |
386 | } |
387 | |
388 | if (needsUpcast) { |
389 | if (!inOnnxBuffer) { |
390 | LOG(ERROR) << "Can't upcast tensor " << inOnnxTensor.name |
391 | << " because buffer is not present" ; |
392 | return ONNXIFI_STATUS_INTERNAL_ERROR; |
393 | } |
394 | if (inOnnxTensor.dataType == ONNXIFI_DATATYPE_INT32 && |
395 | inPhPtr->getType()->getElementType() == ElemKind::Int64ITy) { |
396 | auto TH = inputTensor.getHandle<int64_t>(); |
397 | auto data = reinterpret_cast<int32_t *>(inOnnxBuffer); |
398 | for (size_t k = 0; k < inOnnxTensorSize; ++k) { |
399 | TH.raw(k) = (int64_t)data[k]; |
400 | } |
401 | } else { |
402 | LOG(ERROR) << "Unsupported upcast for tensor " << inOnnxTensor.name |
403 | << ", onnxifi data type: " << inOnnxTensor.dataType |
404 | << ", glow data type: " |
405 | << inPhPtr->getType()->getElementName().data(); |
406 | return ONNXIFI_STATUS_INVALID_DATATYPE; |
407 | } |
408 | } |
409 | |
410 | // Copy the input from onnxTensorDescriptor unless it has a NULL buffer |
411 | // pointer (which is a valid case if the tensor is empty). |
412 | if (inOnnxBuffer) { |
413 | memcpy(inputTensor.getUnsafePtr(), inOnnxBuffer, onnxBytes); |
414 | // Pad remaining space with zeroes. |
415 | memset(inputTensor.getUnsafePtr() + onnxBytes, 0, |
416 | inputTensor.getSizeInBytes() - onnxBytes); |
417 | } else { |
418 | inputTensor.zero(); |
419 | } |
420 | } |
421 | return ONNXIFI_STATUS_SUCCESS; |
422 | } |
423 | |
424 | onnxStatus Graph::setIOAndRun(uint32_t inputsCount, |
425 | const onnxTensorDescriptorV1 *inputDescriptors, |
426 | uint32_t outputsCount, |
427 | const onnxTensorDescriptorV1 *outputDescriptors, |
428 | EventPtr outputEvent, |
429 | onnxTraceEventList *traceEvents) { |
430 | auto ctx = glow::make_unique<ExecutionContext>(); |
431 | |
432 | TraceContext *traceContext = nullptr; |
433 | if (traceEvents || glow::flags::DumpDebugTraces || |
434 | TraceExporterRegistry::getInstance()->shouldTrace()) { |
435 | ctx->setTraceContext(glow::make_unique<TraceContext>(TraceLevel::STANDARD)); |
436 | traceContext = ctx->getTraceContext(); |
437 | traceContext->setThreadName("Onnxifi" ); |
438 | } |
439 | TRACE_EVENT_SCOPE(traceContext, TraceLevel::RUNTIME, "Onnxifi::setIOAndRun" ); |
440 | TRACE_EVENT_SCOPE_NAMED(traceContext, TraceLevel::RUNTIME, "adjustInputs" , |
441 | aiEvent); |
442 | |
443 | auto r = adjustInputs(inputsCount, inputDescriptors, ctx.get()); |
444 | if (r != ONNXIFI_STATUS_SUCCESS) { |
445 | return r; |
446 | } |
447 | |
448 | size_t seq = 0; |
449 | if (glow::onnxifi::flags::SaveIO) { |
450 | seq = ioDumpCounter_++; |
451 | std::stringstream ss; |
452 | ss << "input_" << seq << ".onnx" ; |
453 | std::ofstream of(ss.str(), std::ios::binary); |
454 | if (!of) { |
455 | LOG(ERROR) << "Cannot create input file " << ss.str(); |
456 | } else { |
457 | ONNX_NAMESPACE::GraphProto inputG; |
458 | for (const auto &p : ctx->getExternalIOBindings()) { |
459 | auto *t = inputG.add_initializer(); |
460 | const auto &inputTensor = p.second; |
461 | size_t unpaddedSize = inputTensor.getUnpaddedSizeInBytes(); |
462 | size_t tensorSize = inputTensor.getSizeInBytes(); |
463 | if (unpaddedSize == tensorSize) { |
464 | ONNXModelWriter::writeTensor(inputTensor, t, |
465 | glow::flags::UseCustomOpsForExport); |
466 | } else { |
467 | // If the input is a partial tensor, then save only the part that has |
468 | // data. |
469 | auto ty = inputTensor.getType(); |
470 | auto dims = ty.dims().vec(); |
471 | dims[0] = dims[0] * unpaddedSize / tensorSize; |
472 | const auto &resized = inputTensor.getUnowned(dims); |
473 | ONNXModelWriter::writeTensor(resized, t, |
474 | glow::flags::UseCustomOpsForExport); |
475 | VLOG(1) << "Writing partial tensor " << p.first->getName().str() |
476 | << " full size=" << inputTensor.getType().toString() |
477 | << " partial size=" << inputTensor.getUnpaddedSizeInBytes() |
478 | << " resized size=" << resized.getType().toString(); |
479 | } |
480 | t->set_name(p.first->getName().str()); |
481 | } |
482 | std::string buffer; |
483 | inputG.SerializeToString(&buffer); |
484 | of << buffer; |
485 | } |
486 | } |
487 | |
488 | TRACE_EVENT_SCOPE_END_NAMED(aiEvent); |
489 | TRACE_EVENT_SCOPE_NAMED(traceContext, TraceLevel::RUNTIME, |
490 | "setOnnxifiOutputs" , soEvent); |
491 | |
492 | // Create tensors for output placeholders |
493 | auto &externalIOBindings = ctx->getExternalIOBindings(); |
494 | for (unsigned i = 0; i < outputsCount; ++i) { |
495 | auto &outOnnxTensor = |
496 | const_cast<onnxTensorDescriptorV1 &>(outputDescriptors[i]); |
497 | auto *outOnnxBuffer = reinterpret_cast<void *>(outOnnxTensor.buffer); |
498 | Placeholder *outPhPtr; |
499 | |
500 | if (outputsCount == onnxOutputNames_.size()) { |
501 | outPhPtr = onnxOutputPlaceholders_[i]; |
502 | } else { |
503 | auto outPhIt = onnxOutputToPlaceholder_.find(outOnnxTensor.name); |
504 | if (outPhIt == onnxOutputToPlaceholder_.end()) { |
505 | LOG(ERROR) << "Output name unknown: " << outOnnxTensor.name; |
506 | return ONNXIFI_STATUS_UNIDENTIFIED_NAME; |
507 | } |
508 | outPhPtr = outPhIt->getValue(); |
509 | } |
510 | // Compute the total size of the onnxifi tensor. |
511 | std::vector<dim_t> outOnnxTensorDims(outOnnxTensor.dimensions); |
512 | dim_t outOnnxTensorSize = 1; |
513 | for (unsigned j = 0; j < outOnnxTensor.dimensions; ++j) { |
514 | outOnnxTensorDims[j] = outOnnxTensor.shape[j]; |
515 | outOnnxTensorSize *= outOnnxTensorDims[j]; |
516 | } |
517 | |
518 | // Check that tensor provided by onnxifi is the correct size. |
519 | if (!outPhPtr->dims().equals(outOnnxTensorDims)) { |
520 | LOG(ERROR) << "Output tensor is the wrong shape: " << outOnnxTensorSize |
521 | << " total dims vs " << outPhPtr->getType()->size() << ": " |
522 | << outOnnxTensor.name; |
523 | return ONNXIFI_STATUS_INVALID_SHAPE; |
524 | } |
525 | |
526 | // Set quantized output scale/output. Do not support channelwise quantized |
527 | // output with multiple quantization parameters for now. |
528 | auto type = outPhPtr->getType(); |
529 | if (outOnnxTensor.quantizationParams == 1 && type->isQuantizedType()) { |
530 | const_cast<float *>(outOnnxTensor.scales)[0] = type->getScale(); |
531 | const_cast<int32_t *>(outOnnxTensor.biases)[0] = type->getOffset(); |
532 | } |
533 | |
534 | // Create a Glow tensor backed by the memory from the provided onnxifi |
535 | // tensor and bind it to the appropriate placeholder for the graph output. |
536 | Tensor outputTensor(outOnnxBuffer, outPhPtr->getType()); |
537 | externalIOBindings.emplace_back(outPhPtr, std::move(outputTensor)); |
538 | } |
539 | TRACE_EVENT_SCOPE_END_NAMED(soEvent); |
540 | |
541 | if (ctx->getTraceContext()) { |
542 | ctx->getTraceContext()->setThreadName("Request Thread" ); |
543 | } |
544 | |
545 | // End trace scope before calling into run. run() can trigger the completion |
546 | // callback which deallocates ctx and traceContext. So it will no longer be |
547 | // safe to access the trace context after calling into run(). |
548 | TRACE_EVENT_SCOPE_END(); |
549 | auto ret = run(std::move(ctx), outputEvent, traceEvents); |
550 | if (glow::onnxifi::flags::SaveIO) { |
551 | // We need to wait for the execution to finish in order to extract output |
552 | // values. |
553 | outputEvent->wait(); |
554 | std::stringstream ss; |
555 | ss << "output_" << seq << ".onnx" ; |
556 | std::ofstream of(ss.str(), std::ios::binary); |
557 | if (!of) { |
558 | LOG(ERROR) << "Cannot create output file " << ss.str(); |
559 | } else { |
560 | ONNX_NAMESPACE::GraphProto inputG; |
561 | for (unsigned i = 0; i < outputsCount; ++i) { |
562 | const auto &outOnnxTensor = outputDescriptors[i]; |
563 | auto *outOnnxBuffer = reinterpret_cast<void *>(outOnnxTensor.buffer); |
564 | Placeholder *outPhPtr; |
565 | if (outputsCount == onnxOutputNames_.size()) { |
566 | outPhPtr = onnxOutputPlaceholders_[i]; |
567 | } else { |
568 | auto outPhIt = onnxOutputToPlaceholder_.find(outOnnxTensor.name); |
569 | CHECK(outPhIt != onnxOutputToPlaceholder_.end()); |
570 | outPhPtr = outPhIt->getValue(); |
571 | } |
572 | Tensor outputTensor(outOnnxBuffer, outPhPtr->getType()); |
573 | auto *t = inputG.add_initializer(); |
574 | ONNXModelWriter::writeTensor(outputTensor, t, |
575 | glow::flags::UseCustomOpsForExport); |
576 | t->set_name(outPhPtr->getName().str()); |
577 | } |
578 | std::string buffer; |
579 | inputG.SerializeToString(&buffer); |
580 | of << buffer; |
581 | } |
582 | } |
583 | |
584 | return ret; |
585 | } |
586 | |
587 | void Graph::setTraceEvents(onnxTraceEventList *traceEvents, |
588 | TraceContext *traceContext) { |
589 | /// Export trace events to any registered glow trace exporters |
590 | if (traceContext) { |
591 | TraceExporterRegistry::getInstance()->exportTrace(traceContext); |
592 | } |
593 | |
594 | if (!traceEvents || !traceContext) { |
595 | return; |
596 | } |
597 | /// Internally we use steady_clock, but our interface is system_clock |
598 | /// timestamps. Do a simple conversion. |
599 | auto steadyTS = TraceEvent::now(); |
600 | auto systemTS = std::chrono::duration_cast<std::chrono::microseconds>( |
601 | std::chrono::system_clock::now().time_since_epoch()) |
602 | .count(); |
603 | |
604 | // Timestamps are uint64_t so branch rather than use abs(), we want to make |
605 | // sure we always subtract the smaller from the larger value to avoid |
606 | // underflowing the uint64_t. Then if the timestamp should be moved backwards |
607 | // negate the result. |
608 | int64_t offset = long(steadyTS) > systemTS ? -(steadyTS - systemTS) |
609 | : (systemTS - steadyTS); |
610 | TRACE_EVENT_SCOPE(traceContext, TraceLevel::RUNTIME, |
611 | "Onnxifi::setTraceEvents" ); |
612 | |
613 | std::vector<onnxTraceEvent *> traceEventsVec; |
614 | for (const auto &glowTraceEvent : traceContext->getTraceEvents()) { |
615 | auto *traceEvent = new onnxTraceEvent(); |
616 | traceEvent->eventType = glowTraceEvent.type; |
617 | traceEvent->timestamp = glowTraceEvent.timestamp + offset; |
618 | traceEvent->tid = glowTraceEvent.tid; |
619 | traceEvent->duration = glowTraceEvent.duration; |
620 | size_t nameSize = std::min(glowTraceEvent.name.size(), |
621 | (size_t)ONNXIFI_TRACE_EVENT_NAME_SIZE); |
622 | strncpy(traceEvent->eventName, glowTraceEvent.name.c_str(), nameSize); |
623 | traceEvent->eventName[nameSize] = '\0'; |
624 | traceEventsVec.push_back(traceEvent); |
625 | } |
626 | |
627 | traceEvents->numEvents = traceEventsVec.size(); |
628 | traceEvents->traceEvents = new onnxTraceEvent *[traceEventsVec.size()]; |
629 | DCHECK(traceEvents->traceEvents); |
630 | std::copy(traceEventsVec.begin(), traceEventsVec.end(), |
631 | traceEvents->traceEvents); |
632 | } |
633 | |
634 | void Graph::releaseTraceEvents(onnxTraceEventList *traceEvents) { |
635 | DCHECK(traceEvents); |
636 | for (uint64_t i = 0; i < traceEvents->numEvents; ++i) { |
637 | onnxTraceEvent *traceEvent = traceEvents->traceEvents[i]; |
638 | delete traceEvent; |
639 | } |
640 | |
641 | delete[] traceEvents->traceEvents; |
642 | } |
643 | |
644 | Graph::Graph(BackendPtr backendPtr) : backendPtr_(backendPtr) {} |
645 | |
646 | } // namespace onnxifi |
647 | } // namespace glow |
648 | |