microkernel-utils.c source code [pytorch/third_party/XNNPACK/src/microkernel-utils.c]

1	// Copyright 2022 Google LLC
2	//
3	// This source code is licensed under the BSD-style license found in the
4	// LICENSE file in the root directory of this source tree.
5
6	#include <assert.h>
7
8	#include <xnnpack/math.h>
9	#include <xnnpack/microkernel-utils.h>
10
11	size_t xnn_dwconv_multipass_tile_size(
12	size_t kernel_size,
13	size_t first_pass_tile,
14	size_t middle_pass_tile,
15	size_t last_pass_tile)
16	{
17	assert(kernel_size > first_pass_tile);
18	// We always have a first and last pass. We run as many middle pass as possible.
19	// E.g. kernel_size == 9, first_pass_tile = 2, middle_pass_tile = 3, last_pass_tile == 3.
20	// 1 first pass (8 left), 2 middle pass (2 left), last pass (with remainder 1).
21	return (first_pass_tile + last_pass_tile +
22	round_up(doz(kernel_size, first_pass_tile + last_pass_tile), middle_pass_tile));
23	}
24
25	size_t xnn_dwconv_multipass_weights_count(
26	size_t tile_size,
27	size_t channels,
28	size_t channel_tile,
29	size_t channel_subtile,
30	size_t channel_round)
31	{
32	// First and middle pass runs as many channel_tile-sized loops as possible, and can over-read up to channel_round.
33	const size_t subtiled_channels = round_up_po2(channels, channel_round);
34	// 1 for bias, we always have a first and last pass.
35	return (`1` + tile_size) *
36	// as many channel_tile-sized loops as possible.
37	(round_down_po2(subtiled_channels, channel_tile) +
38	// handle the remainder in channel_subtile loops.
39	round_up_po2(mod_po2(subtiled_channels, channel_tile), channel_subtile));
40	}
41

Browse the source code of pytorch/third_party/XNNPACK/src/microkernel-utils.c