1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <cstdlib>
17#include <random>
18
19#include "Bench.h"
20
21using namespace glow;
22
23extern "C" {
24// Forward declare functions from libjit.
25extern void libjit_conv2d_f(float *outW, const float *inW, const float *filterW,
26 const float *biasW, const size_t *outWdims,
27 const size_t *inWdims, const size_t *filterWdims,
28 const size_t *biasWdims, const size_t *kernelSizes,
29 const size_t *strides, const size_t *pads,
30 size_t group, unsigned depthUnroll);
31}
32
33/// Benchmark a convolution with specified parameters on square inputs.
34class ConvBench : public Benchmark {
35 /// Matrices
36 std::vector<float> outW;
37 std::vector<float> inW;
38 std::vector<float> filterW;
39 std::vector<float> biasW;
40
41 /// Dimensions
42 // [batch, h, w, channels]
43 size_t outWdims[4];
44 size_t inWdims[4];
45 // [outputChannels, h, w, inputChannels]
46 size_t filterWdims[4];
47
48 /// Parameters
49 size_t kernelSizes[2];
50 size_t strides[2];
51 size_t pads[2];
52 size_t group;
53 unsigned depthUnroll;
54
55public:
56 ConvBench(size_t inputBatch, size_t inputEdgeSize, size_t inputChannels,
57 size_t filterMultiplier, size_t kernelSize, size_t stride,
58 size_t pad, size_t group)
59 : kernelSizes{kernelSize, kernelSize}, strides{stride, stride}, pads{pad,
60 pad},
61 group(group) {
62
63 inWdims[0] = inputBatch;
64 inWdims[1] = inputEdgeSize;
65 inWdims[2] = inputEdgeSize;
66 inWdims[3] = inputChannels;
67
68 filterWdims[0] = filterMultiplier * group;
69 filterWdims[1] = kernelSize;
70 filterWdims[2] = kernelSize;
71 filterWdims[3] = inWdims[3] / group;
72
73 size_t outEdgeSize =
74 ((inputEdgeSize + (2 * pad) - kernelSize) / stride) + 1;
75 outWdims[0] = inWdims[0];
76 outWdims[1] = outEdgeSize;
77 outWdims[2] = outEdgeSize;
78 outWdims[3] = filterWdims[0];
79
80 depthUnroll = (((outWdims[3] / group) % 8) == 0) ? 8 : 1;
81 }
82
83 virtual void setup() override {
84 size_t outSize = mapMult(outWdims, 4);
85 size_t inSize = mapMult(inWdims, 4);
86 size_t filterSize = mapMult(filterWdims, 4);
87 size_t biasSize = filterWdims[0];
88
89 outW.resize(outSize);
90 inW.resize(inSize);
91 filterW.resize(filterSize);
92 biasW.resize(biasSize);
93
94 randomize(inSize, inW.data());
95 randomize(filterSize, filterW.data());
96 randomize(biasSize, biasW.data());
97 }
98
99 virtual void run() override {
100 // biasWDims isn't used in libjit_conv2d_f, so we're passing NULL.
101 libjit_conv2d_f(outW.data(), inW.data(), filterW.data(), biasW.data(),
102 outWdims, inWdims, filterWdims, NULL, kernelSizes, strides,
103 pads, group, depthUnroll);
104 }
105
106 virtual void teardown() override {}
107
108private:
109 size_t mapMult(size_t *vec, int size) {
110 size_t result = 1;
111 for (int i = 0; i < size; i++) {
112 result *= vec[i];
113 }
114 return result;
115 }
116
117 void randomize(size_t size, float *a) {
118 std::mt19937 gen;
119 std::uniform_real_distribution<> dis(-1.0, 1.0);
120 for (size_t i = 0; i < size; i++) {
121 a[i] = dis(gen);
122 }
123 }
124};
125
126int main() {
127 constexpr int reps = 10;
128 printf("inputBatch, inputEdgeSize, inputChannels, filterMultiplier, "
129 "kernelSize, stride, pad, group, bestInSeconds\n");
130
131 for (size_t inputBatch : {1, 3}) {
132 for (size_t inputEdgeSize : {7, 56, 224}) {
133 for (size_t inputChannels : {64, 128, 1024}) {
134 for (size_t filterMultiplier : {1, 8}) {
135 for (size_t kernelSize : {1, 3}) {
136 for (size_t stride : {1, 4}) {
137 size_t pad = kernelSize / 2;
138 if ((inputEdgeSize + (pad * 2)) <= kernelSize)
139 continue;
140 for (size_t group : {1, 112}) {
141 if (inputChannels % group != 0)
142 continue;
143 ConvBench b(inputBatch, inputEdgeSize, inputChannels,
144 filterMultiplier, kernelSize, stride, pad, group);
145 auto times = bench(&b, reps);
146 double time = *(std::min_element(times.begin(), times.end()));
147 printf("%zu, %zu, %zu, %zu, %zu, %zu, %zu, %zu, %f\n",
148 inputBatch, inputEdgeSize, inputChannels,
149 filterMultiplier, kernelSize, stride, pad, group, time);
150 } // group
151 } // stride
152 } // kernelSize
153 } // filterMultiplier
154 } // inputChannels
155 } // inputEdgeSize
156 } // inputBatch
157}
158