1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include <cstdlib> |
17 | #include <random> |
18 | |
19 | #include "Bench.h" |
20 | |
21 | using namespace glow; |
22 | |
23 | extern "C" { |
24 | // Forward declare functions from libjit. |
25 | extern void libjit_conv2d_f(float *outW, const float *inW, const float *filterW, |
26 | const float *biasW, const size_t *outWdims, |
27 | const size_t *inWdims, const size_t *filterWdims, |
28 | const size_t *biasWdims, const size_t *kernelSizes, |
29 | const size_t *strides, const size_t *pads, |
30 | size_t group, unsigned depthUnroll); |
31 | } |
32 | |
33 | /// Benchmark a convolution with specified parameters on square inputs. |
34 | class ConvBench : public Benchmark { |
35 | /// Matrices |
36 | std::vector<float> outW; |
37 | std::vector<float> inW; |
38 | std::vector<float> filterW; |
39 | std::vector<float> biasW; |
40 | |
41 | /// Dimensions |
42 | // [batch, h, w, channels] |
43 | size_t outWdims[4]; |
44 | size_t inWdims[4]; |
45 | // [outputChannels, h, w, inputChannels] |
46 | size_t filterWdims[4]; |
47 | |
48 | /// Parameters |
49 | size_t kernelSizes[2]; |
50 | size_t strides[2]; |
51 | size_t pads[2]; |
52 | size_t group; |
53 | unsigned depthUnroll; |
54 | |
55 | public: |
56 | ConvBench(size_t inputBatch, size_t inputEdgeSize, size_t inputChannels, |
57 | size_t filterMultiplier, size_t kernelSize, size_t stride, |
58 | size_t pad, size_t group) |
59 | : kernelSizes{kernelSize, kernelSize}, strides{stride, stride}, pads{pad, |
60 | pad}, |
61 | group(group) { |
62 | |
63 | inWdims[0] = inputBatch; |
64 | inWdims[1] = inputEdgeSize; |
65 | inWdims[2] = inputEdgeSize; |
66 | inWdims[3] = inputChannels; |
67 | |
68 | filterWdims[0] = filterMultiplier * group; |
69 | filterWdims[1] = kernelSize; |
70 | filterWdims[2] = kernelSize; |
71 | filterWdims[3] = inWdims[3] / group; |
72 | |
73 | size_t outEdgeSize = |
74 | ((inputEdgeSize + (2 * pad) - kernelSize) / stride) + 1; |
75 | outWdims[0] = inWdims[0]; |
76 | outWdims[1] = outEdgeSize; |
77 | outWdims[2] = outEdgeSize; |
78 | outWdims[3] = filterWdims[0]; |
79 | |
80 | depthUnroll = (((outWdims[3] / group) % 8) == 0) ? 8 : 1; |
81 | } |
82 | |
83 | virtual void setup() override { |
84 | size_t outSize = mapMult(outWdims, 4); |
85 | size_t inSize = mapMult(inWdims, 4); |
86 | size_t filterSize = mapMult(filterWdims, 4); |
87 | size_t biasSize = filterWdims[0]; |
88 | |
89 | outW.resize(outSize); |
90 | inW.resize(inSize); |
91 | filterW.resize(filterSize); |
92 | biasW.resize(biasSize); |
93 | |
94 | randomize(inSize, inW.data()); |
95 | randomize(filterSize, filterW.data()); |
96 | randomize(biasSize, biasW.data()); |
97 | } |
98 | |
99 | virtual void run() override { |
100 | // biasWDims isn't used in libjit_conv2d_f, so we're passing NULL. |
101 | libjit_conv2d_f(outW.data(), inW.data(), filterW.data(), biasW.data(), |
102 | outWdims, inWdims, filterWdims, NULL, kernelSizes, strides, |
103 | pads, group, depthUnroll); |
104 | } |
105 | |
106 | virtual void teardown() override {} |
107 | |
108 | private: |
109 | size_t mapMult(size_t *vec, int size) { |
110 | size_t result = 1; |
111 | for (int i = 0; i < size; i++) { |
112 | result *= vec[i]; |
113 | } |
114 | return result; |
115 | } |
116 | |
117 | void randomize(size_t size, float *a) { |
118 | std::mt19937 gen; |
119 | std::uniform_real_distribution<> dis(-1.0, 1.0); |
120 | for (size_t i = 0; i < size; i++) { |
121 | a[i] = dis(gen); |
122 | } |
123 | } |
124 | }; |
125 | |
126 | int main() { |
127 | constexpr int reps = 10; |
128 | printf("inputBatch, inputEdgeSize, inputChannels, filterMultiplier, " |
129 | "kernelSize, stride, pad, group, bestInSeconds\n" ); |
130 | |
131 | for (size_t inputBatch : {1, 3}) { |
132 | for (size_t inputEdgeSize : {7, 56, 224}) { |
133 | for (size_t inputChannels : {64, 128, 1024}) { |
134 | for (size_t filterMultiplier : {1, 8}) { |
135 | for (size_t kernelSize : {1, 3}) { |
136 | for (size_t stride : {1, 4}) { |
137 | size_t pad = kernelSize / 2; |
138 | if ((inputEdgeSize + (pad * 2)) <= kernelSize) |
139 | continue; |
140 | for (size_t group : {1, 112}) { |
141 | if (inputChannels % group != 0) |
142 | continue; |
143 | ConvBench b(inputBatch, inputEdgeSize, inputChannels, |
144 | filterMultiplier, kernelSize, stride, pad, group); |
145 | auto times = bench(&b, reps); |
146 | double time = *(std::min_element(times.begin(), times.end())); |
147 | printf("%zu, %zu, %zu, %zu, %zu, %zu, %zu, %zu, %f\n" , |
148 | inputBatch, inputEdgeSize, inputChannels, |
149 | filterMultiplier, kernelSize, stride, pad, group, time); |
150 | } // group |
151 | } // stride |
152 | } // kernelSize |
153 | } // filterMultiplier |
154 | } // inputChannels |
155 | } // inputEdgeSize |
156 | } // inputBatch |
157 | } |
158 | |