1 | // Copyright 2022 Google LLC |
2 | // |
3 | // This source code is licensed under the BSD-style license found in the |
4 | // LICENSE file in the root directory of this source tree. |
5 | |
6 | #include <assert.h> |
7 | |
8 | #include <xnnpack/math.h> |
9 | #include <xnnpack/microkernel-utils.h> |
10 | |
11 | size_t xnn_dwconv_multipass_tile_size( |
12 | size_t kernel_size, |
13 | size_t first_pass_tile, |
14 | size_t middle_pass_tile, |
15 | size_t last_pass_tile) |
16 | { |
17 | assert(kernel_size > first_pass_tile); |
18 | // We always have a first and last pass. We run as many middle pass as possible. |
19 | // E.g. kernel_size == 9, first_pass_tile = 2, middle_pass_tile = 3, last_pass_tile == 3. |
20 | // 1 first pass (8 left), 2 middle pass (2 left), last pass (with remainder 1). |
21 | return (first_pass_tile + last_pass_tile + |
22 | round_up(doz(kernel_size, first_pass_tile + last_pass_tile), middle_pass_tile)); |
23 | } |
24 | |
25 | size_t xnn_dwconv_multipass_weights_count( |
26 | size_t tile_size, |
27 | size_t channels, |
28 | size_t channel_tile, |
29 | size_t channel_subtile, |
30 | size_t channel_round) |
31 | { |
32 | // First and middle pass runs as many channel_tile-sized loops as possible, and can over-read up to channel_round. |
33 | const size_t subtiled_channels = round_up_po2(channels, channel_round); |
34 | // 1 for bias, we always have a first and last pass. |
35 | return (1 + tile_size) * |
36 | // as many channel_tile-sized loops as possible. |
37 | (round_down_po2(subtiled_channels, channel_tile) + |
38 | // handle the remainder in channel_subtile loops. |
39 | round_up_po2(mod_po2(subtiled_channels, channel_tile), channel_subtile)); |
40 | } |
41 | |