1// Copyright 2022 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <assert.h>
7
8#include <xnnpack/math.h>
9#include <xnnpack/microkernel-utils.h>
10
11size_t xnn_dwconv_multipass_tile_size(
12 size_t kernel_size,
13 size_t first_pass_tile,
14 size_t middle_pass_tile,
15 size_t last_pass_tile)
16{
17 assert(kernel_size > first_pass_tile);
18 // We always have a first and last pass. We run as many middle pass as possible.
19 // E.g. kernel_size == 9, first_pass_tile = 2, middle_pass_tile = 3, last_pass_tile == 3.
20 // 1 first pass (8 left), 2 middle pass (2 left), last pass (with remainder 1).
21 return (first_pass_tile + last_pass_tile +
22 round_up(doz(kernel_size, first_pass_tile + last_pass_tile), middle_pass_tile));
23}
24
25size_t xnn_dwconv_multipass_weights_count(
26 size_t tile_size,
27 size_t channels,
28 size_t channel_tile,
29 size_t channel_subtile,
30 size_t channel_round)
31{
32 // First and middle pass runs as many channel_tile-sized loops as possible, and can over-read up to channel_round.
33 const size_t subtiled_channels = round_up_po2(channels, channel_round);
34 // 1 for bias, we always have a first and last pass.
35 return (1 + tile_size) *
36 // as many channel_tile-sized loops as possible.
37 (round_down_po2(subtiled_channels, channel_tile) +
38 // handle the remainder in channel_subtile loops.
39 round_up_po2(mod_po2(subtiled_channels, channel_tile), channel_subtile));
40}
41