1// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#ifndef __MACH__
7#define _POSIX_C_SOURCE 199309L
8#endif
9
10#include <assert.h>
11#include <math.h>
12#include <stddef.h>
13#include <stdint.h>
14#include <stdio.h> // For snprintf.
15#include <stdlib.h>
16
17#include <xnnpack.h>
18#include <xnnpack/allocator.h>
19#include <xnnpack/cache.h>
20#include <xnnpack/common.h>
21#include <xnnpack/log.h>
22#include <xnnpack/math.h>
23#include <xnnpack/memory-planner.h>
24#include <xnnpack/node-type.h>
25#include <xnnpack/operator.h>
26#include <xnnpack/params.h>
27#include <xnnpack/subgraph.h>
28
29#if defined(__EMSCRIPTEN__)
30#include <emscripten/emscripten.h>
31#elif XNN_PLATFORM_WINDOWS
32#include <windows.h>
33#else
34#include <errno.h>
35#include <time.h>
36#endif
37
38#ifndef XNN_ENABLE_JIT
39 #error "XNN_ENABLE_JIT is not defined"
40#endif
41
42enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out)
43{
44 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
45 xnn_log_error("failed to create workspace: XNNPACK is not initialized");
46 return xnn_status_uninitialized;
47 }
48
49 struct xnn_workspace* workspace = NULL;
50 workspace = xnn_allocate_zero_memory(sizeof(struct xnn_workspace));
51 if (workspace == NULL) {
52 xnn_log_error("failed to allocate %zu bytes for workspace descriptor", sizeof(struct xnn_workspace));
53 return xnn_status_out_of_memory;
54 }
55 workspace->ref_count = 1;
56 *workspace_out = workspace;
57 return xnn_status_success;
58}
59
60static inline void xnn_retain_workspace(xnn_workspace_t workspace)
61{
62 workspace->ref_count++;
63}
64
65enum xnn_status xnn_release_workspace(xnn_workspace_t workspace)
66{
67 assert(workspace->ref_count != 0);
68 if (--workspace->ref_count == 0) {
69 xnn_release_simd_memory(workspace->data);
70 xnn_release_memory(workspace);
71 }
72 return xnn_status_success;
73}
74
75enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out)
76{
77 struct xnn_weights_cache* weights_cache = NULL;
78 enum xnn_status status = xnn_status_uninitialized;
79
80 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
81 xnn_log_error("failed to create weights cache: XNNPACK is not initialized");
82 goto error;
83 }
84
85 weights_cache = xnn_allocate_zero_memory(sizeof(struct xnn_weights_cache));
86 if (weights_cache == NULL) {
87 xnn_log_error("failed to allocate %zu bytes for weights cache descriptor", sizeof(struct xnn_weights_cache));
88 goto error;
89 }
90
91 status = xnn_init_weights_cache_with_size(weights_cache, size);
92 if (status != xnn_status_success) {
93 goto error;
94 }
95 *weights_cache_out = weights_cache;
96 return xnn_status_success;
97
98error:
99 xnn_release_weights_cache(weights_cache);
100 return status;
101}
102
103enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out)
104{
105 return xnn_create_weights_cache_with_size(XNN_DEFAULT_WEIGHTS_BUFFER_SIZE, weights_cache_out);
106}
107
108enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache)
109{
110 enum xnn_status status = xnn_release_weights_cache(weights_cache);
111 if (status != xnn_status_success) {
112 return status;
113 }
114 xnn_release_memory(weights_cache);
115 return xnn_status_success;
116}
117
118enum xnn_status xnn_create_runtime(
119 xnn_subgraph_t subgraph,
120 xnn_runtime_t* runtime_out)
121{
122 return xnn_create_runtime_v2(subgraph, NULL /* threadpool */, 0 /* flags */, runtime_out);
123}
124
125enum xnn_status xnn_create_runtime_v2(
126 xnn_subgraph_t subgraph,
127 pthreadpool_t threadpool,
128 uint32_t flags,
129 xnn_runtime_t* runtime_out)
130{
131 return xnn_create_runtime_v3(subgraph, /* weights_cache */ NULL, threadpool, flags, runtime_out);
132}
133
134enum xnn_status xnn_create_runtime_v3(
135 xnn_subgraph_t subgraph,
136 xnn_weights_cache_t weights_cache,
137 pthreadpool_t threadpool,
138 uint32_t flags,
139 xnn_runtime_t* runtime_out)
140{
141 xnn_workspace_t workspace;
142 enum xnn_status status = xnn_create_workspace(&workspace);
143 if (status != xnn_status_success) {
144 return status;
145 }
146 status = xnn_create_runtime_v4(subgraph, weights_cache, workspace, threadpool, flags, runtime_out);
147 // Release workspace regardless of return status of creating runtime.
148 xnn_release_workspace(workspace);
149 return status;
150}
151
152static enum xnn_status initialize_workspace_blobs(
153 xnn_subgraph_t subgraph,
154 xnn_runtime_t runtime,
155 struct xnn_value_allocation_tracker* mem_alloc_tracker)
156{
157 assert(runtime->workspace != NULL);
158 const size_t persistent_size = runtime->workspace->persistent_size;
159 size_t mem_arena_size = mem_alloc_tracker->mem_arena_size + persistent_size;
160 if (mem_arena_size == 0) {
161 return xnn_status_success;
162 }
163 // Sparse microkernels can read up to 2 * XNN_EXTRA_BYTES beyond array bounds.
164 mem_arena_size += 2 * XNN_EXTRA_BYTES;
165
166 // Records how much the workspace has moved by due to allocating a larger workspace.
167 ptrdiff_t workspace_data_delta = 0;
168 // Allocates larger workspace here if needed.
169 if (runtime->workspace->size < mem_arena_size) {
170 void* old_workspace_data = runtime->workspace->data;
171 if (runtime->workspace->size != 0) {
172 // Free up the workspace's current data. Free first then allocate to keep peak memory usage low.
173 xnn_release_simd_memory(runtime->workspace->data);
174 }
175 void* new_workspace_data = xnn_allocate_simd_memory(mem_arena_size);
176 if (new_workspace_data == NULL) {
177 xnn_log_error("failed to allocate %zu bytes for runtime workspace", mem_arena_size);
178 return xnn_status_out_of_memory;
179 }
180 runtime->workspace->data = new_workspace_data;
181 runtime->workspace->size = mem_arena_size;
182 xnn_log_debug("created workspace of size %zu", mem_arena_size);
183 // Keep track of how much the workspace data moved.
184 if (old_workspace_data != NULL) {
185 workspace_data_delta = (uintptr_t) new_workspace_data - (uintptr_t) old_workspace_data;
186 }
187 }
188
189 assert(runtime->workspace->size >= mem_arena_size);
190
191 // Initialize current runtime's blob pointers.
192 size_t persistent_offset = 0;
193 for (size_t i = 0; i < subgraph->num_values; i++) {
194 const struct xnn_value* value = &subgraph->values[i];
195 struct xnn_blob* blob = &runtime->blobs[i];
196 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
197 if (blob->allocation_type == xnn_allocation_type_workspace) {
198 // Value is purely internal to the runtime, allocate it in the workspace.
199 blob->data = (void*) ((uintptr_t) runtime->workspace->data + persistent_size + mem_alloc_tracker->usage[i].alloc_offset);
200 } else if (blob->allocation_type == xnn_allocation_type_persistent) {
201 blob->data = (void*) ((uintptr_t) runtime->workspace->data + persistent_offset);
202 persistent_offset += round_up_po2(blob->size, XNN_EXTRA_BYTES);
203 }
204 }
205 }
206 assert(persistent_offset == persistent_size);
207
208 // Adjust the blob pointers of all runtimes that share this workspace.
209 if (workspace_data_delta != 0) {
210 for (struct xnn_runtime* rt = runtime->workspace->first_user; rt != NULL; rt = rt->next_workspace_user) {
211 // The current runtime already has the correct offset.
212 if (rt == runtime) {
213 continue;
214 }
215 for (size_t i = 0; i < rt->num_blobs; i++) {
216 struct xnn_blob* blob = &rt->blobs[i];
217 if (blob->allocation_type == xnn_allocation_type_workspace ||
218 blob->allocation_type == xnn_allocation_type_persistent) {
219 assert(blob->data != NULL);
220 blob->data = (void*) ((uintptr_t) blob->data + workspace_data_delta);
221 }
222 }
223 }
224 }
225
226 return xnn_status_success;
227}
228
229// External inputs cannot be overwritten.
230// Static inputs cannot be overwritten.
231// Persistent tensors have their own space allocated at the front of the workspace.
232// If input has more than 1 consumer, we can't track all the consumers and update the first_consumer, so bail out.
233static bool input_memory_can_be_reused(const struct xnn_value* input, const struct xnn_value* output)
234{
235 return !xnn_value_is_external(input) && !xnn_value_is_static(input) && !xnn_value_is_persistent(input)
236 && !xnn_value_is_persistent(output) && input->num_consumers <= 1;
237}
238
239// An in-place operation reuses the input tensor's memory for its output. Examples are element-wise unary operations
240// like activation functions. Usually, an output tensor is allocated space. For an in-place operation, we want the
241// output tensor to share the input tensor's memory. We do this by calling xnn_mark_tensor_as_reuse, which:
242// - sets the tensor_size of output tensor's usage record to 0
243// - mark this usage record as reusing another tensor's memory
244// - remember the id of the tensor which we will reuse the alloc_offset to set onto the output tensor
245static void optimize_tensor_allocation_for_in_place_operations(
246 struct xnn_value_allocation_tracker* tracker,
247 xnn_subgraph_t subgraph)
248{
249 xnn_subgraph_analyze_consumers_and_producers(subgraph);
250 for (uint32_t n = 0; n < subgraph->num_nodes; n++) {
251 struct xnn_node* node = &subgraph->nodes[n];
252 switch (node->type) {
253 case xnn_node_type_abs:
254 case xnn_node_type_bankers_rounding:
255 case xnn_node_type_ceiling:
256 case xnn_node_type_clamp:
257 case xnn_node_type_copy:
258 case xnn_node_type_elu:
259 case xnn_node_type_floor:
260 case xnn_node_type_hardswish:
261 case xnn_node_type_leaky_relu:
262 case xnn_node_type_negate:
263 case xnn_node_type_prelu:
264 case xnn_node_type_sigmoid:
265 case xnn_node_type_softmax:
266 case xnn_node_type_square:
267 case xnn_node_type_square_root:
268 case xnn_node_type_static_reshape:
269 // Valid operation types that can be optimized.
270 break;
271 default:
272 continue;
273 }
274 struct xnn_value* output = &subgraph->values[node->outputs[0]];
275 const uint32_t input_id = node->inputs[0];
276 const struct xnn_value* input = &subgraph->values[input_id];
277 if (!input_memory_can_be_reused(input, output)) {
278 // TODO(zhin): consider aliasing input to output rather than output to input.
279 continue;
280 }
281 if (output->num_consumers == 1) {
282 uint32_t reuse_id = input_id;
283 // If the tensor we are reusing is itself reused, find the "root tensor" to be reused.
284 while (tracker->usage[reuse_id].reuse_value_id != XNN_INVALID_VALUE_ID) {
285 reuse_id = tracker->usage[reuse_id].reuse_value_id;
286 }
287 // We only support when output has a single consumer because we cannot easily find all consumer nodes
288 // without traversing the entire graph. This will require tracking output->last_consumer in the future.
289 assert(tracker->usage[reuse_id].last_node < output->first_consumer);
290 xnn_log_debug("reusing tensor id #%" PRIu32 " memory for tensor id #%" PRIu32 " Node #%" PRIu32 " %s",
291 reuse_id, output->id, node->id, xnn_node_type_to_string(node->type));
292 xnn_mark_tensor_as_reuse(tracker, output->id, reuse_id, output->first_consumer);
293 }
294 }
295}
296
297enum xnn_status xnn_create_runtime_v4(
298 xnn_subgraph_t subgraph,
299 xnn_weights_cache_t weights_cache,
300 xnn_workspace_t workspace,
301 pthreadpool_t threadpool,
302 uint32_t flags,
303 xnn_runtime_t* runtime_out)
304{
305 struct xnn_runtime* runtime = NULL;
306 enum xnn_status status = xnn_status_uninitialized;
307
308 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
309 xnn_log_error("failed to create runtime: XNNPACK is not initialized");
310 goto error;
311 }
312
313 if (workspace == NULL) {
314 xnn_log_error("failed to create runtime: workspace is NULL");
315 status = xnn_status_invalid_parameter;
316 goto error;
317 }
318
319 const uint32_t optimization_flags = XNN_FLAG_SPARSE_INFERENCE | XNN_FLAG_HINT_FP16_INFERENCE |
320 XNN_FLAG_FORCE_FP16_INFERENCE | XNN_FLAG_NO_OPERATOR_FUSION;
321 status = xnn_subgraph_optimize(subgraph, flags & optimization_flags);
322 if (status != xnn_status_success) {
323 xnn_log_error("failed to optimize subgraph");
324 goto error;
325 }
326
327 status = xnn_status_out_of_memory;
328
329 runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
330 if (runtime == NULL) {
331 xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
332 goto error;
333 }
334
335 runtime->opdata = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
336 if (runtime->opdata == NULL) {
337 xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
338 sizeof(struct xnn_operator_data) * (size_t) subgraph->num_nodes);
339 goto error;
340 }
341 runtime->num_ops = subgraph->num_nodes;
342
343 if (flags & XNN_FLAG_YIELD_WORKERS) {
344 struct xnn_node* last_valid_node = NULL;
345 for (size_t i = 0; i < subgraph->num_nodes; i++) {
346 struct xnn_node* node = subgraph->nodes + i;
347 if (node->type != xnn_node_type_invalid) {
348 last_valid_node = node;
349 }
350 }
351 if (last_valid_node != NULL) {
352 last_valid_node->flags |= XNN_FLAG_YIELD_WORKERS;
353 }
354 }
355
356 struct xnn_code_cache* code_cache = NULL;
357#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
358 code_cache = &runtime->code_cache;
359 status = xnn_init_code_cache(code_cache);
360 if (status != xnn_status_success) {
361 goto error;
362 }
363#endif
364 const struct xnn_caches caches = {
365 .code_cache = code_cache,
366 .weights_cache = weights_cache,
367 };
368
369 struct xnn_value* values = subgraph->values;
370 for (size_t i = 0; i < subgraph->num_nodes; i++) {
371 const struct xnn_node* node = subgraph->nodes + i;
372
373 // Ignore fused nodes
374 if (node->type != xnn_node_type_invalid) {
375 assert(node->create != NULL);
376 status = node->create(node, values, subgraph->num_values, runtime->opdata + i, &caches);
377 if (status != xnn_status_success) {
378 goto error;
379 }
380 runtime->opdata[i].setup = node->setup;
381 }
382 }
383
384#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
385 xnn_finalize_code_memory(&code_cache->cache.code);
386#endif
387
388 runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
389 if (runtime->blobs == NULL) {
390 xnn_log_error("failed to allocate %zu bytes for blob descriptors",
391 sizeof(struct xnn_blob) * (size_t) subgraph->num_values);
392 goto error;
393 }
394 runtime->num_blobs = subgraph->num_values;
395
396 struct xnn_value_allocation_tracker mem_alloc_tracker;
397 xnn_init_value_allocation_tracker(&mem_alloc_tracker, subgraph);
398
399 size_t persistent_size = 0;
400 for (uint32_t i = 0; i < subgraph->num_values; i++) {
401 struct xnn_value* value = &subgraph->values[i];
402 struct xnn_blob* blob = &runtime->blobs[i];
403 if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
404 blob->size = xnn_tensor_get_size(subgraph, i);
405 blob->data = (void*) (uintptr_t) value->data;
406 if (blob->data == NULL) {
407 if (xnn_value_is_external(value)) {
408 // Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
409 blob->allocation_type = xnn_allocation_type_external;
410 } else if (xnn_value_is_persistent(value)) {
411 // Persistent values are allocated in the front of the workspace without overlaps.
412 blob->allocation_type = xnn_allocation_type_persistent;
413 persistent_size += round_up_po2(blob->size, XNN_EXTRA_BYTES);
414 } else {
415 // Value is purely internal to the runtime, and must be allocated in its workspace.
416 xnn_add_value_allocation_tracker(&mem_alloc_tracker, i, round_up_po2(blob->size, XNN_EXTRA_BYTES));
417 blob->allocation_type = xnn_allocation_type_workspace;
418 }
419 } else {
420 blob->allocation_type = xnn_allocation_type_static;
421 }
422 }
423 }
424 optimize_tensor_allocation_for_in_place_operations(&mem_alloc_tracker, subgraph);
425 xnn_plan_value_allocation_tracker(&mem_alloc_tracker);
426
427 xnn_retain_workspace(workspace);
428 runtime->workspace = workspace;
429 runtime->next_workspace_user = runtime->workspace->first_user;
430 runtime->workspace->first_user = runtime;
431 runtime->workspace->persistent_size = persistent_size;
432
433 status = initialize_workspace_blobs(subgraph, runtime, &mem_alloc_tracker);
434 if (status != xnn_status_success) {
435 xnn_release_value_allocation_tracker(&mem_alloc_tracker);
436 goto error;
437 }
438
439 if (flags & XNN_FLAG_BASIC_PROFILING) {
440 runtime->profiling = true;
441 }
442
443 xnn_release_value_allocation_tracker(&mem_alloc_tracker);
444
445 runtime->threadpool = threadpool;
446
447 *runtime_out = runtime;
448 return xnn_status_success;
449
450error:
451 xnn_delete_runtime(runtime);
452 return status;
453}
454
455enum xnn_status xnn_setup_runtime(
456 xnn_runtime_t runtime,
457 size_t num_external_values,
458 const struct xnn_external_value* external_values)
459{
460 // Validate inputs without changing internal state.
461 // This ensures that runtime stays in consistent state in case validation fails midway.
462 for (size_t i = 0; i < num_external_values; i++) {
463 const struct xnn_external_value* external_value = &external_values[i];
464 const uint32_t value_id = external_value->id;
465 if (value_id >= runtime->num_blobs) {
466 xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
467 value_id, i);
468 return xnn_status_invalid_parameter;
469 }
470
471 const struct xnn_blob* blob = &runtime->blobs[value_id];
472 if (blob->allocation_type != xnn_allocation_type_external) {
473 xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
474 return xnn_status_invalid_parameter;
475 }
476 }
477
478 // Apply runtime state changes.
479 for (size_t i = 0; i < num_external_values; i++) {
480 const struct xnn_external_value* external_value = &external_values[i];
481 const uint32_t value_id = external_value->id;
482 struct xnn_blob* blob = &runtime->blobs[value_id];
483 blob->data = external_value->data;
484 }
485
486 for (size_t i = 0; i < runtime->num_ops; i++) {
487 const struct xnn_operator_data* opdata = &runtime->opdata[i];
488 if (opdata->operator_objects[0] == NULL) {
489 // Operator was removed during optimization
490 continue;
491 }
492
493 // Ensure that weights cache is finalized.
494 struct xnn_weights_cache* weights_cache = opdata->operator_objects[0]->weights_cache;
495 if (weights_cache != NULL && !xnn_weights_cache_is_finalized(weights_cache)) {
496 xnn_log_error("weights cache needs to be finalized before setup/infer");
497 return xnn_status_invalid_state;
498 }
499
500 assert(opdata->setup != NULL);
501 const enum xnn_status status = opdata->setup(opdata, runtime->blobs, runtime->num_blobs, runtime->threadpool);
502 if (status != xnn_status_success) {
503 xnn_log_error("failed to setup runtime: error in operator #%zu", i);
504 return status;
505 }
506 }
507
508 return xnn_status_success;
509}
510
511static xnn_timestamp xnn_read_timer() {
512 xnn_timestamp timestamp;
513#ifdef __MACH__
514 timestamp = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
515 if (timestamp == 0) {
516 xnn_log_warning("clock_gettime failed: error code %d", errno);
517 }
518#elif __EMSCRIPTEN__
519 timestamp = emscripten_get_now();
520#elif XNN_PLATFORM_WINDOWS
521 BOOL res = QueryPerformanceCounter(&timestamp);
522 if (!res) {
523 xnn_log_error("QueryPerformanceCounter failed: error code %u", GetLastError());
524 memset(&timestamp, 0, sizeof(timestamp));
525 }
526#else
527 int res = clock_gettime(CLOCK_MONOTONIC, &timestamp);
528 if (res != 0) {
529 xnn_log_error("clock_gettime failed: error code %d", errno);
530 memset(&timestamp, 0, sizeof(timestamp));
531 }
532#endif
533 return timestamp;
534}
535
536static inline uint64_t xnn_get_elapsed_time(const xnn_timestamp* start, const xnn_timestamp* end) {
537#ifdef __MACH__
538 const uint64_t kMicrosInNanos = 1000;
539 return (*end - *start) / kMicrosInNanos;
540#elif __EMSCRIPTEN__
541 const double kMillisInMicros = 1.0e3;
542 return (uint64_t) ((*end - *start) * kMillisInMicros);
543#elif XNN_PLATFORM_WINDOWS
544 const uint64_t kMicrosInSec = 1000 * 1000;
545 LARGE_INTEGER frequency;
546 BOOL res = QueryPerformanceFrequency(&frequency);
547 if (!res) {
548 xnn_log_error("QueryPerformanceFrequency failed: error code %u", GetLastError());
549 return 0;
550 }
551 return ((end->QuadPart - start->QuadPart) * kMicrosInSec) / frequency.QuadPart;
552#else
553 const uint64_t kNanosInMicro = UINT64_C(1000);
554 const uint64_t kNanosInSec = UINT64_C(1000000000);
555 const uint64_t secs = (end->tv_sec - start->tv_sec) * kNanosInSec;
556 const uint64_t ns_secs = (end->tv_nsec - start->tv_nsec);
557 return (secs + ns_secs) / kNanosInMicro;
558#endif
559}
560
561enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime,
562 enum xnn_profile_info param_name,
563 size_t param_value_size,
564 void* param_value,
565 size_t* param_value_size_ret)
566{
567 if (!runtime->profiling) {
568 return xnn_status_invalid_state;
569 }
570 enum xnn_status status = xnn_status_success;
571 size_t required_size = 0;
572 const struct xnn_operator_data* opdata = runtime->opdata;
573 switch (param_name) {
574 case xnn_profile_info_num_operators:
575 required_size = sizeof(size_t);
576 if (param_value_size < required_size){
577 *param_value_size_ret = required_size;
578 status = xnn_status_out_of_memory;
579 } else {
580 size_t num_valid_ops = 0;
581 for (size_t i = 0; i < runtime->num_ops; ++i) {
582 if (opdata[i].operator_objects[0] != NULL) {
583 num_valid_ops += 1;
584 }
585 }
586 memcpy(param_value, &num_valid_ops, required_size);
587 }
588 break;
589 case xnn_profile_info_operator_name:
590 for (size_t i = 0; i < runtime->num_ops; ++i) {
591 if (opdata[i].operator_objects[0] != NULL) {
592 const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[0]->type);
593 size_t op_name_len = strlen(op_name) + 1;
594 if (opdata[i].operator_objects[0]->ukernel.type != xnn_microkernel_type_default ) {
595 op_name_len += strlen(xnn_microkernel_type_to_string(opdata[i].operator_objects[0]->ukernel.type)) + 1;
596 }
597 required_size += op_name_len;
598 }
599 }
600 if (param_value_size < required_size) {
601 *param_value_size_ret = required_size;
602 status = xnn_status_out_of_memory;
603 } else {
604 char* name_out = (char*) param_value;
605 for (size_t i = 0; i < runtime->num_ops; ++i) {
606 if (opdata[i].operator_objects[0] != NULL) {
607 const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[0]->type);
608 size_t op_name_len = strlen(op_name) + 1;
609 if (opdata[i].operator_objects[0]->ukernel.type != xnn_microkernel_type_default ) {
610 const char* ukernel_type = xnn_microkernel_type_to_string(opdata[i].operator_objects[0]->ukernel.type);
611 op_name_len += strlen(ukernel_type) + 1;
612 snprintf(name_out, op_name_len, "%s %s", op_name, ukernel_type);
613 } else {
614 snprintf(name_out, op_name_len, "%s", op_name);
615 }
616 name_out += op_name_len;
617 }
618 }
619 }
620 break;
621 case xnn_profile_info_operator_timing:
622 {
623 size_t num_valid_ops = 0;
624 for (size_t i = 0; i < runtime->num_ops; ++i) {
625 if (opdata[i].operator_objects[0] != NULL) {
626 num_valid_ops += 1;
627 }
628 }
629 required_size = num_valid_ops * sizeof(uint64_t);
630 if (param_value_size < required_size) {
631 *param_value_size_ret = required_size;
632 status = xnn_status_out_of_memory;
633 } else {
634 xnn_timestamp previous_ts = runtime->start_ts;
635 uint64_t* data = (uint64_t*) param_value;
636 for (size_t i = 0; i < runtime->num_ops; ++i) {
637 if (opdata[i].operator_objects[0] != NULL) {
638 uint64_t op_time = 0;
639 for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
640 if (opdata[i].operator_objects[j] != NULL) {
641 op_time += xnn_get_elapsed_time(&previous_ts, &opdata[i].end_ts[j]);
642 previous_ts = opdata[i].end_ts[j];
643 }
644 }
645 *data++ = op_time;
646 }
647 }
648 }
649 break;
650 }
651 default:
652 status = xnn_status_invalid_parameter;
653 }
654 return status;
655}
656
657enum xnn_status xnn_invoke_runtime(
658 xnn_runtime_t runtime)
659{
660 if (runtime->profiling) {
661 runtime->start_ts = xnn_read_timer();
662 }
663 for (size_t i = 0; i < runtime->num_ops; i++) {
664 for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
665 if (runtime->opdata[i].operator_objects[j] == NULL) {
666 // Operator was removed after fusion
667 continue;
668 }
669
670 const enum xnn_status status = xnn_run_operator_with_index(runtime->opdata[i].operator_objects[j], i, j, runtime->threadpool);
671 if (status != xnn_status_success) {
672 return status;
673 }
674 if (runtime->profiling) {
675 runtime->opdata[i].end_ts[j] = xnn_read_timer();
676 }
677 }
678 }
679 return xnn_status_success;
680}
681
682enum xnn_status xnn_delete_runtime(
683 xnn_runtime_t runtime)
684{
685 if (runtime != NULL) {
686 if (runtime->opdata != NULL) {
687 for (size_t i = 0; i < runtime->num_ops; i++) {
688 for (size_t j = 0; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
689 xnn_delete_operator(runtime->opdata[i].operator_objects[j]);
690 }
691 }
692 xnn_release_memory(runtime->opdata);
693
694 xnn_release_memory(runtime->blobs);
695 if (runtime->workspace != NULL) {
696 // Remove this runtime from the list of users of the workspace.
697 assert(runtime->workspace->first_user != NULL);
698 if (runtime->workspace->first_user == runtime) {
699 runtime->workspace->first_user = runtime->next_workspace_user;
700 } else {
701 xnn_runtime_t prev = runtime->workspace->first_user;
702 xnn_runtime_t curr = prev->next_workspace_user;
703 while (curr != runtime) {
704 prev = curr;
705 curr = curr->next_workspace_user;
706 }
707 assert(curr == runtime);
708 prev->next_workspace_user = curr->next_workspace_user;
709 }
710 xnn_release_workspace(runtime->workspace);
711 }
712 }
713#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
714 xnn_release_code_cache(&runtime->code_cache);
715#endif
716 xnn_release_memory(runtime);
717 }
718 return xnn_status_success;
719}
720