runtime.c source code [pytorch/third_party/XNNPACK/src/runtime.c]

1	// Copyright 2020 Google LLC
2	//
3	// This source code is licensed under the BSD-style license found in the
4	// LICENSE file in the root directory of this source tree.
5
6	#ifndef __MACH__
7	#define _POSIX_C_SOURCE 199309L
8	#endif
9
10	#include <assert.h>
11	#include <math.h>
12	#include <stddef.h>
13	#include <stdint.h>
14	#include <stdio.h> // For snprintf.
15	#include <stdlib.h>
16
17	#include <xnnpack.h>
18	#include <xnnpack/allocator.h>
19	#include <xnnpack/cache.h>
20	#include <xnnpack/common.h>
21	#include <xnnpack/log.h>
22	#include <xnnpack/math.h>
23	#include <xnnpack/memory-planner.h>
24	#include <xnnpack/node-type.h>
25	#include <xnnpack/operator.h>
26	#include <xnnpack/params.h>
27	#include <xnnpack/subgraph.h>
28
29	#if defined(__EMSCRIPTEN__)
30	#include <emscripten/emscripten.h>
31	#elif XNN_PLATFORM_WINDOWS
32	#include <windows.h>
33	#else
34	#include <errno.h>
35	#include <time.h>
36	#endif
37
38	#ifndef XNN_ENABLE_JIT
39	#error "XNN_ENABLE_JIT is not defined"
40	#endif
41
42	enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out)
43	{
44	if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == `0`) {
45	xnn_log_error("failed to create workspace: XNNPACK is not initialized");
46	return xnn_status_uninitialized;
47	}
48
49	struct xnn_workspace* workspace = NULL;
50	workspace = xnn_allocate_zero_memory(sizeof(struct xnn_workspace));
51	if (workspace == NULL) {
52	xnn_log_error("failed to allocate %zu bytes for workspace descriptor", sizeof(struct xnn_workspace));
53	return xnn_status_out_of_memory;
54	}
55	workspace->ref_count = `1`;
56	*workspace_out = workspace;
57	return xnn_status_success;
58	}
59
60	static inline void xnn_retain_workspace(xnn_workspace_t workspace)
61	{
62	workspace->ref_count++;
63	}
64
65	enum xnn_status xnn_release_workspace(xnn_workspace_t workspace)
66	{
67	assert(workspace->ref_count != `0`);
68	if (--workspace->ref_count == `0`) {
69	xnn_release_simd_memory(workspace->data);
70	xnn_release_memory(workspace);
71	}
72	return xnn_status_success;
73	}
74
75	enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out)
76	{
77	struct xnn_weights_cache* weights_cache = NULL;
78	enum xnn_status status = xnn_status_uninitialized;
79
80	if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == `0`) {
81	xnn_log_error("failed to create weights cache: XNNPACK is not initialized");
82	goto error;
83	}
84
85	weights_cache = xnn_allocate_zero_memory(sizeof(struct xnn_weights_cache));
86	if (weights_cache == NULL) {
87	xnn_log_error("failed to allocate %zu bytes for weights cache descriptor", sizeof(struct xnn_weights_cache));
88	goto error;
89	}
90
91	status = xnn_init_weights_cache_with_size(weights_cache, size);
92	if (status != xnn_status_success) {
93	goto error;
94	}
95	*weights_cache_out = weights_cache;
96	return xnn_status_success;
97
98	error:
99	xnn_release_weights_cache(weights_cache);
100	return status;
101	}
102
103	enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out)
104	{
105	return xnn_create_weights_cache_with_size(XNN_DEFAULT_WEIGHTS_BUFFER_SIZE, weights_cache_out);
106	}
107
108	enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache)
109	{
110	enum xnn_status status = xnn_release_weights_cache(weights_cache);
111	if (status != xnn_status_success) {
112	return status;
113	}
114	xnn_release_memory(weights_cache);
115	return xnn_status_success;
116	}
117
118	enum xnn_status xnn_create_runtime(
119	xnn_subgraph_t subgraph,
120	xnn_runtime_t* runtime_out)
121	{
122	return xnn_create_runtime_v2(subgraph, NULL / threadpool /, `0` / flags /, runtime_out);
123	}
124
125	enum xnn_status xnn_create_runtime_v2(
126	xnn_subgraph_t subgraph,
127	pthreadpool_t threadpool,
128	uint32_t flags,
129	xnn_runtime_t* runtime_out)
130	{
131	return xnn_create_runtime_v3(subgraph, / weights_cache / NULL, threadpool, flags, runtime_out);
132	}
133
134	enum xnn_status xnn_create_runtime_v3(
135	xnn_subgraph_t subgraph,
136	xnn_weights_cache_t weights_cache,
137	pthreadpool_t threadpool,
138	uint32_t flags,
139	xnn_runtime_t* runtime_out)
140	{
141	xnn_workspace_t workspace;
142	enum xnn_status status = xnn_create_workspace(&workspace);
143	if (status != xnn_status_success) {
144	return status;
145	}
146	status = xnn_create_runtime_v4(subgraph, weights_cache, workspace, threadpool, flags, runtime_out);
147	// Release workspace regardless of return status of creating runtime.
148	xnn_release_workspace(workspace);
149	return status;
150	}
151
152	static enum xnn_status initialize_workspace_blobs(
153	xnn_subgraph_t subgraph,
154	xnn_runtime_t runtime,
155	struct xnn_value_allocation_tracker* mem_alloc_tracker)
156	{
157	assert(runtime->workspace != NULL);
158	const size_t persistent_size = runtime->workspace->persistent_size;
159	size_t mem_arena_size = mem_alloc_tracker->mem_arena_size + persistent_size;
160	if (mem_arena_size == `0`) {
161	return xnn_status_success;
162	}
163	// Sparse microkernels can read up to 2 XNN_EXTRA_BYTES beyond array bounds.*
164	mem_arena_size += `2` * XNN_EXTRA_BYTES;
165
166	// Records how much the workspace has moved by due to allocating a larger workspace.
167	ptrdiff_t workspace_data_delta = `0`;
168	// Allocates larger workspace here if needed.
169	if (runtime->workspace->size < mem_arena_size) {
170	void* old_workspace_data = runtime->workspace->data;
171	if (runtime->workspace->size != `0`) {
172	// Free up the workspace's current data. Free first then allocate to keep peak memory usage low.
173	xnn_release_simd_memory(runtime->workspace->data);
174	}
175	void* new_workspace_data = xnn_allocate_simd_memory(mem_arena_size);
176	if (new_workspace_data == NULL) {
177	xnn_log_error("failed to allocate %zu bytes for runtime workspace", mem_arena_size);
178	return xnn_status_out_of_memory;
179	}
180	runtime->workspace->data = new_workspace_data;
181	runtime->workspace->size = mem_arena_size;
182	xnn_log_debug("created workspace of size %zu", mem_arena_size);
183	// Keep track of how much the workspace data moved.
184	if (old_workspace_data != NULL) {
185	workspace_data_delta = (uintptr_t) new_workspace_data - (uintptr_t) old_workspace_data;
186	}
187	}
188
189	assert(runtime->workspace->size >= mem_arena_size);
190
191	// Initialize current runtime's blob pointers.
192	size_t persistent_offset = `0`;
193	for (size_t i = `0`; i < subgraph->num_values; i++) {
194	const struct xnn_value* value = &subgraph->values[i];
195	struct xnn_blob* blob = &runtime->blobs[i];
196	if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
197	if (blob->allocation_type == xnn_allocation_type_workspace) {
198	// Value is purely internal to the runtime, allocate it in the workspace.
199	blob->data = (void*) ((uintptr_t) runtime->workspace->data + persistent_size + mem_alloc_tracker->usage[i].alloc_offset);
200	} else if (blob->allocation_type == xnn_allocation_type_persistent) {
201	blob->data = (void*) ((uintptr_t) runtime->workspace->data + persistent_offset);
202	persistent_offset += round_up_po2(blob->size, XNN_EXTRA_BYTES);
203	}
204	}
205	}
206	assert(persistent_offset == persistent_size);
207
208	// Adjust the blob pointers of all runtimes that share this workspace.
209	if (workspace_data_delta != `0`) {
210	for (struct xnn_runtime* rt = runtime->workspace->first_user; rt != NULL; rt = rt->next_workspace_user) {
211	// The current runtime already has the correct offset.
212	if (rt == runtime) {
213	continue;
214	}
215	for (size_t i = `0`; i < rt->num_blobs; i++) {
216	struct xnn_blob* blob = &rt->blobs[i];
217	if (blob->allocation_type == xnn_allocation_type_workspace \|\|
218	blob->allocation_type == xnn_allocation_type_persistent) {
219	assert(blob->data != NULL);
220	blob->data = (void*) ((uintptr_t) blob->data + workspace_data_delta);
221	}
222	}
223	}
224	}
225
226	return xnn_status_success;
227	}
228
229	// External inputs cannot be overwritten.
230	// Static inputs cannot be overwritten.
231	// Persistent tensors have their own space allocated at the front of the workspace.
232	// If input has more than 1 consumer, we can't track all the consumers and update the first_consumer, so bail out.
233	static bool input_memory_can_be_reused(const struct xnn_value* input, const struct xnn_value* output)
234	{
235	return !xnn_value_is_external(input) && !xnn_value_is_static(input) && !xnn_value_is_persistent(input)
236	&& !xnn_value_is_persistent(output) && input->num_consumers <= `1`;
237	}
238
239	// An in-place operation reuses the input tensor's memory for its output. Examples are element-wise unary operations
240	// like activation functions. Usually, an output tensor is allocated space. For an in-place operation, we want the
241	// output tensor to share the input tensor's memory. We do this by calling xnn_mark_tensor_as_reuse, which:
242	// - sets the tensor_size of output tensor's usage record to 0
243	// - mark this usage record as reusing another tensor's memory
244	// - remember the id of the tensor which we will reuse the alloc_offset to set onto the output tensor
245	static void optimize_tensor_allocation_for_in_place_operations(
246	struct xnn_value_allocation_tracker* tracker,
247	xnn_subgraph_t subgraph)
248	{
249	xnn_subgraph_analyze_consumers_and_producers(subgraph);
250	for (uint32_t n = `0`; n < subgraph->num_nodes; n++) {
251	struct xnn_node* node = &subgraph->nodes[n];
252	switch (node->type) {
253	case xnn_node_type_abs:
254	case xnn_node_type_bankers_rounding:
255	case xnn_node_type_ceiling:
256	case xnn_node_type_clamp:
257	case xnn_node_type_copy:
258	case xnn_node_type_elu:
259	case xnn_node_type_floor:
260	case xnn_node_type_hardswish:
261	case xnn_node_type_leaky_relu:
262	case xnn_node_type_negate:
263	case xnn_node_type_prelu:
264	case xnn_node_type_sigmoid:
265	case xnn_node_type_softmax:
266	case xnn_node_type_square:
267	case xnn_node_type_square_root:
268	case xnn_node_type_static_reshape:
269	// Valid operation types that can be optimized.
270	break;
271	default:
272	continue;
273	}
274	struct xnn_value* output = &subgraph->values[node->outputs[`0`]];
275	const uint32_t input_id = node->inputs[`0`];
276	const struct xnn_value* input = &subgraph->values[input_id];
277	if (!input_memory_can_be_reused(input, output)) {
278	// TODO(zhin): consider aliasing input to output rather than output to input.
279	continue;
280	}
281	if (output->num_consumers == `1`) {
282	uint32_t reuse_id = input_id;
283	// If the tensor we are reusing is itself reused, find the "root tensor" to be reused.
284	while (tracker->usage[reuse_id].reuse_value_id != XNN_INVALID_VALUE_ID) {
285	reuse_id = tracker->usage[reuse_id].reuse_value_id;
286	}
287	// We only support when output has a single consumer because we cannot easily find all consumer nodes
288	// without traversing the entire graph. This will require tracking output->last_consumer in the future.
289	assert(tracker->usage[reuse_id].last_node < output->first_consumer);
290	xnn_log_debug("reusing tensor id #%" PRIu32 " memory for tensor id #%" PRIu32 " Node #%" PRIu32 " %s",
291	reuse_id, output->id, node->id, xnn_node_type_to_string(node->type));
292	xnn_mark_tensor_as_reuse(tracker, output->id, reuse_id, output->first_consumer);
293	}
294	}
295	}
296
297	enum xnn_status xnn_create_runtime_v4(
298	xnn_subgraph_t subgraph,
299	xnn_weights_cache_t weights_cache,
300	xnn_workspace_t workspace,
301	pthreadpool_t threadpool,
302	uint32_t flags,
303	xnn_runtime_t* runtime_out)
304	{
305	struct xnn_runtime* runtime = NULL;
306	enum xnn_status status = xnn_status_uninitialized;
307
308	if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == `0`) {
309	xnn_log_error("failed to create runtime: XNNPACK is not initialized");
310	goto error;
311	}
312
313	if (workspace == NULL) {
314	xnn_log_error("failed to create runtime: workspace is NULL");
315	status = xnn_status_invalid_parameter;
316	goto error;
317	}
318
319	const uint32_t optimization_flags = XNN_FLAG_SPARSE_INFERENCE \| XNN_FLAG_HINT_FP16_INFERENCE \|
320	XNN_FLAG_FORCE_FP16_INFERENCE \| XNN_FLAG_NO_OPERATOR_FUSION;
321	status = xnn_subgraph_optimize(subgraph, flags & optimization_flags);
322	if (status != xnn_status_success) {
323	xnn_log_error("failed to optimize subgraph");
324	goto error;
325	}
326
327	status = xnn_status_out_of_memory;
328
329	runtime = xnn_allocate_zero_memory(sizeof(struct xnn_runtime));
330	if (runtime == NULL) {
331	xnn_log_error("failed to allocate %zu bytes for runtime descriptor", sizeof(struct xnn_runtime));
332	goto error;
333	}
334
335	runtime->opdata = xnn_allocate_zero_memory(sizeof(struct xnn_operator_data) * subgraph->num_nodes);
336	if (runtime->opdata == NULL) {
337	xnn_log_error("failed to allocate %zu bytes for opdata descriptors",
338	sizeof(struct xnn_operator_data) * (size_t) subgraph->num_nodes);
339	goto error;
340	}
341	runtime->num_ops = subgraph->num_nodes;
342
343	if (flags & XNN_FLAG_YIELD_WORKERS) {
344	struct xnn_node* last_valid_node = NULL;
345	for (size_t i = `0`; i < subgraph->num_nodes; i++) {
346	struct xnn_node* node = subgraph->nodes + i;
347	if (node->type != xnn_node_type_invalid) {
348	last_valid_node = node;
349	}
350	}
351	if (last_valid_node != NULL) {
352	last_valid_node->flags \|= XNN_FLAG_YIELD_WORKERS;
353	}
354	}
355
356	struct xnn_code_cache* code_cache = NULL;
357	#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
358	code_cache = &runtime->code_cache;
359	status = xnn_init_code_cache(code_cache);
360	if (status != xnn_status_success) {
361	goto error;
362	}
363	#endif
364	const struct xnn_caches caches = {
365	.code_cache = code_cache,
366	.weights_cache = weights_cache,
367	};
368
369	struct xnn_value* values = subgraph->values;
370	for (size_t i = `0`; i < subgraph->num_nodes; i++) {
371	const struct xnn_node* node = subgraph->nodes + i;
372
373	// Ignore fused nodes
374	if (node->type != xnn_node_type_invalid) {
375	assert(node->create != NULL);
376	status = node->create(node, values, subgraph->num_values, runtime->opdata + i, &caches);
377	if (status != xnn_status_success) {
378	goto error;
379	}
380	runtime->opdata[i].setup = node->setup;
381	}
382	}
383
384	#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
385	xnn_finalize_code_memory(&code_cache->cache.code);
386	#endif
387
388	runtime->blobs = xnn_allocate_zero_memory(sizeof(struct xnn_blob) * subgraph->num_values);
389	if (runtime->blobs == NULL) {
390	xnn_log_error("failed to allocate %zu bytes for blob descriptors",
391	sizeof(struct xnn_blob) * (size_t) subgraph->num_values);
392	goto error;
393	}
394	runtime->num_blobs = subgraph->num_values;
395
396	struct xnn_value_allocation_tracker mem_alloc_tracker;
397	xnn_init_value_allocation_tracker(&mem_alloc_tracker, subgraph);
398
399	size_t persistent_size = `0`;
400	for (uint32_t i = `0`; i < subgraph->num_values; i++) {
401	struct xnn_value* value = &subgraph->values[i];
402	struct xnn_blob* blob = &runtime->blobs[i];
403	if (value->datatype != xnn_datatype_invalid && value->type == xnn_value_type_dense_tensor) {
404	blob->size = xnn_tensor_get_size(subgraph, i);
405	blob->data = (void*) (uintptr_t) value->data;
406	if (blob->data == NULL) {
407	if (xnn_value_is_external(value)) {
408	// Value is non-static and external to the runtime: must be specified via a call to xnn_setup_runtime.
409	blob->allocation_type = xnn_allocation_type_external;
410	} else if (xnn_value_is_persistent(value)) {
411	// Persistent values are allocated in the front of the workspace without overlaps.
412	blob->allocation_type = xnn_allocation_type_persistent;
413	persistent_size += round_up_po2(blob->size, XNN_EXTRA_BYTES);
414	} else {
415	// Value is purely internal to the runtime, and must be allocated in its workspace.
416	xnn_add_value_allocation_tracker(&mem_alloc_tracker, i, round_up_po2(blob->size, XNN_EXTRA_BYTES));
417	blob->allocation_type = xnn_allocation_type_workspace;
418	}
419	} else {
420	blob->allocation_type = xnn_allocation_type_static;
421	}
422	}
423	}
424	optimize_tensor_allocation_for_in_place_operations(&mem_alloc_tracker, subgraph);
425	xnn_plan_value_allocation_tracker(&mem_alloc_tracker);
426
427	xnn_retain_workspace(workspace);
428	runtime->workspace = workspace;
429	runtime->next_workspace_user = runtime->workspace->first_user;
430	runtime->workspace->first_user = runtime;
431	runtime->workspace->persistent_size = persistent_size;
432
433	status = initialize_workspace_blobs(subgraph, runtime, &mem_alloc_tracker);
434	if (status != xnn_status_success) {
435	xnn_release_value_allocation_tracker(&mem_alloc_tracker);
436	goto error;
437	}
438
439	if (flags & XNN_FLAG_BASIC_PROFILING) {
440	runtime->profiling = true;
441	}
442
443	xnn_release_value_allocation_tracker(&mem_alloc_tracker);
444
445	runtime->threadpool = threadpool;
446
447	*runtime_out = runtime;
448	return xnn_status_success;
449
450	error:
451	xnn_delete_runtime(runtime);
452	return status;
453	}
454
455	enum xnn_status xnn_setup_runtime(
456	xnn_runtime_t runtime,
457	size_t num_external_values,
458	const struct xnn_external_value* external_values)
459	{
460	// Validate inputs without changing internal state.
461	// This ensures that runtime stays in consistent state in case validation fails midway.
462	for (size_t i = `0`; i < num_external_values; i++) {
463	const struct xnn_external_value* external_value = &external_values[i];
464	const uint32_t value_id = external_value->id;
465	if (value_id >= runtime->num_blobs) {
466	xnn_log_error("failed to setup runtime: out-of-bounds ID %" PRIu32 " in external value #%zu",
467	value_id, i);
468	return xnn_status_invalid_parameter;
469	}
470
471	const struct xnn_blob* blob = &runtime->blobs[value_id];
472	if (blob->allocation_type != xnn_allocation_type_external) {
473	xnn_log_error("failed to setup runtime: Value %" PRIu32 " is not external", value_id);
474	return xnn_status_invalid_parameter;
475	}
476	}
477
478	// Apply runtime state changes.
479	for (size_t i = `0`; i < num_external_values; i++) {
480	const struct xnn_external_value* external_value = &external_values[i];
481	const uint32_t value_id = external_value->id;
482	struct xnn_blob* blob = &runtime->blobs[value_id];
483	blob->data = external_value->data;
484	}
485
486	for (size_t i = `0`; i < runtime->num_ops; i++) {
487	const struct xnn_operator_data* opdata = &runtime->opdata[i];
488	if (opdata->operator_objects[`0`] == NULL) {
489	// Operator was removed during optimization
490	continue;
491	}
492
493	// Ensure that weights cache is finalized.
494	struct xnn_weights_cache* weights_cache = opdata->operator_objects[`0`]->weights_cache;
495	if (weights_cache != NULL && !xnn_weights_cache_is_finalized(weights_cache)) {
496	xnn_log_error("weights cache needs to be finalized before setup/infer");
497	return xnn_status_invalid_state;
498	}
499
500	assert(opdata->setup != NULL);
501	const enum xnn_status status = opdata->setup(opdata, runtime->blobs, runtime->num_blobs, runtime->threadpool);
502	if (status != xnn_status_success) {
503	xnn_log_error("failed to setup runtime: error in operator #%zu", i);
504	return status;
505	}
506	}
507
508	return xnn_status_success;
509	}
510
511	static xnn_timestamp xnn_read_timer() {
512	xnn_timestamp timestamp;
513	#ifdef __MACH__
514	timestamp = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
515	if (timestamp == `0`) {
516	xnn_log_warning("clock_gettime failed: error code %d", errno);
517	}
518	#elif __EMSCRIPTEN__
519	timestamp = emscripten_get_now();
520	#elif XNN_PLATFORM_WINDOWS
521	BOOL res = QueryPerformanceCounter(&timestamp);
522	if (!res) {
523	xnn_log_error("QueryPerformanceCounter failed: error code %u", GetLastError());
524	memset(&timestamp, `0`, sizeof(timestamp));
525	}
526	#else
527	int res = clock_gettime(CLOCK_MONOTONIC, &timestamp);
528	if (res != `0`) {
529	xnn_log_error("clock_gettime failed: error code %d", errno);
530	memset(&timestamp, `0`, sizeof(timestamp));
531	}
532	#endif
533	return timestamp;
534	}
535
536	static inline uint64_t xnn_get_elapsed_time(const xnn_timestamp* start, const xnn_timestamp* end) {
537	#ifdef __MACH__
538	const uint64_t kMicrosInNanos = `1000`;
539	return (end - start) / kMicrosInNanos;
540	#elif __EMSCRIPTEN__
541	const double kMillisInMicros = `1.0e3`;
542	return (uint64_t) ((end - start) * kMillisInMicros);
543	#elif XNN_PLATFORM_WINDOWS
544	const uint64_t kMicrosInSec = `1000` * `1000`;
545	LARGE_INTEGER frequency;
546	BOOL res = QueryPerformanceFrequency(&frequency);
547	if (!res) {
548	xnn_log_error("QueryPerformanceFrequency failed: error code %u", GetLastError());
549	return `0`;
550	}
551	return ((end->QuadPart - start->QuadPart) * kMicrosInSec) / frequency.QuadPart;
552	#else
553	const uint64_t kNanosInMicro = UINT64_C(`1000`);
554	const uint64_t kNanosInSec = UINT64_C(`1000000000`);
555	const uint64_t secs = (end->tv_sec - start->tv_sec) * kNanosInSec;
556	const uint64_t ns_secs = (end->tv_nsec - start->tv_nsec);
557	return (secs + ns_secs) / kNanosInMicro;
558	#endif
559	}
560
561	enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime,
562	enum xnn_profile_info param_name,
563	size_t param_value_size,
564	void* param_value,
565	size_t* param_value_size_ret)
566	{
567	if (!runtime->profiling) {
568	return xnn_status_invalid_state;
569	}
570	enum xnn_status status = xnn_status_success;
571	size_t required_size = `0`;
572	const struct xnn_operator_data* opdata = runtime->opdata;
573	switch (param_name) {
574	case xnn_profile_info_num_operators:
575	required_size = sizeof(size_t);
576	if (param_value_size < required_size){
577	*param_value_size_ret = required_size;
578	status = xnn_status_out_of_memory;
579	} else {
580	size_t num_valid_ops = `0`;
581	for (size_t i = `0`; i < runtime->num_ops; ++i) {
582	if (opdata[i].operator_objects[`0`] != NULL) {
583	num_valid_ops += `1`;
584	}
585	}
586	memcpy(param_value, &num_valid_ops, required_size);
587	}
588	break;
589	case xnn_profile_info_operator_name:
590	for (size_t i = `0`; i < runtime->num_ops; ++i) {
591	if (opdata[i].operator_objects[`0`] != NULL) {
592	const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[`0`]->type);
593	size_t op_name_len = strlen(op_name) + `1`;
594	if (opdata[i].operator_objects[`0`]->ukernel.type != xnn_microkernel_type_default ) {
595	op_name_len += strlen(xnn_microkernel_type_to_string(opdata[i].operator_objects[`0`]->ukernel.type)) + `1`;
596	}
597	required_size += op_name_len;
598	}
599	}
600	if (param_value_size < required_size) {
601	*param_value_size_ret = required_size;
602	status = xnn_status_out_of_memory;
603	} else {
604	char* name_out = (char*) param_value;
605	for (size_t i = `0`; i < runtime->num_ops; ++i) {
606	if (opdata[i].operator_objects[`0`] != NULL) {
607	const char* op_name = xnn_operator_type_to_string(opdata[i].operator_objects[`0`]->type);
608	size_t op_name_len = strlen(op_name) + `1`;
609	if (opdata[i].operator_objects[`0`]->ukernel.type != xnn_microkernel_type_default ) {
610	const char* ukernel_type = xnn_microkernel_type_to_string(opdata[i].operator_objects[`0`]->ukernel.type);
611	op_name_len += strlen(ukernel_type) + `1`;
612	snprintf(name_out, op_name_len, "%s %s", op_name, ukernel_type);
613	} else {
614	snprintf(name_out, op_name_len, "%s", op_name);
615	}
616	name_out += op_name_len;
617	}
618	}
619	}
620	break;
621	case xnn_profile_info_operator_timing:
622	{
623	size_t num_valid_ops = `0`;
624	for (size_t i = `0`; i < runtime->num_ops; ++i) {
625	if (opdata[i].operator_objects[`0`] != NULL) {
626	num_valid_ops += `1`;
627	}
628	}
629	required_size = num_valid_ops * sizeof(uint64_t);
630	if (param_value_size < required_size) {
631	*param_value_size_ret = required_size;
632	status = xnn_status_out_of_memory;
633	} else {
634	xnn_timestamp previous_ts = runtime->start_ts;
635	uint64_t* data = (uint64_t*) param_value;
636	for (size_t i = `0`; i < runtime->num_ops; ++i) {
637	if (opdata[i].operator_objects[`0`] != NULL) {
638	uint64_t op_time = `0`;
639	for (size_t j = `0`; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
640	if (opdata[i].operator_objects[j] != NULL) {
641	op_time += xnn_get_elapsed_time(&previous_ts, &opdata[i].end_ts[j]);
642	previous_ts = opdata[i].end_ts[j];
643	}
644	}
645	*data++ = op_time;
646	}
647	}
648	}
649	break;
650	}
651	default:
652	status = xnn_status_invalid_parameter;
653	}
654	return status;
655	}
656
657	enum xnn_status xnn_invoke_runtime(
658	xnn_runtime_t runtime)
659	{
660	if (runtime->profiling) {
661	runtime->start_ts = xnn_read_timer();
662	}
663	for (size_t i = `0`; i < runtime->num_ops; i++) {
664	for (size_t j = `0`; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
665	if (runtime->opdata[i].operator_objects[j] == NULL) {
666	// Operator was removed after fusion
667	continue;
668	}
669
670	const enum xnn_status status = xnn_run_operator_with_index(runtime->opdata[i].operator_objects[j], i, j, runtime->threadpool);
671	if (status != xnn_status_success) {
672	return status;
673	}
674	if (runtime->profiling) {
675	runtime->opdata[i].end_ts[j] = xnn_read_timer();
676	}
677	}
678	}
679	return xnn_status_success;
680	}
681
682	enum xnn_status xnn_delete_runtime(
683	xnn_runtime_t runtime)
684	{
685	if (runtime != NULL) {
686	if (runtime->opdata != NULL) {
687	for (size_t i = `0`; i < runtime->num_ops; i++) {
688	for (size_t j = `0`; j < XNN_MAX_OPERATOR_OBJECTS; j++) {
689	xnn_delete_operator(runtime->opdata[i].operator_objects[j]);
690	}
691	}
692	xnn_release_memory(runtime->opdata);
693
694	xnn_release_memory(runtime->blobs);
695	if (runtime->workspace != NULL) {
696	// Remove this runtime from the list of users of the workspace.
697	assert(runtime->workspace->first_user != NULL);
698	if (runtime->workspace->first_user == runtime) {
699	runtime->workspace->first_user = runtime->next_workspace_user;
700	} else {
701	xnn_runtime_t prev = runtime->workspace->first_user;
702	xnn_runtime_t curr = prev->next_workspace_user;
703	while (curr != runtime) {
704	prev = curr;
705	curr = curr->next_workspace_user;
706	}
707	assert(curr == runtime);
708	prev->next_workspace_user = curr->next_workspace_user;
709	}
710	xnn_release_workspace(runtime->workspace);
711	}
712	}
713	#if XNN_PLATFORM_JIT && XNN_ENABLE_JIT
714	xnn_release_code_cache(&runtime->code_cache);
715	#endif
716	xnn_release_memory(runtime);
717	}
718	return xnn_status_success;
719	}
720

Browse the source code of pytorch/third_party/XNNPACK/src/runtime.c