1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <array>
17#include <cstdlib>
18#include <future>
19#include <random>
20
21#include "Bench.h"
22
23#include "ConvUtils.h"
24#include "glow/ExecutionEngine/ExecutionEngine.h"
25#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26
27using namespace glow;
28using namespace std;
29
30vector<avg_pool_param_t<2>> shapes_2d = {
31 // OC = 1x1
32 // MB, IC, IH, IW (=IH), KH (=IH), KW (=IW),
33 // 2D Avg Pool with broadcasts to make multi-layer avg pools work out.
34 avg_pool_param_t<>(1, 768, {50, 50}, {50, 50}, {1, 1}, {0, 0, 0, 0}),
35 avg_pool_param_t<>(1, 224, {100, 100}, {100, 100}, {1, 1}, {0, 0, 0, 0}),
36 avg_pool_param_t<>(1, 192, {100, 100}, {100, 100}, {1, 1}, {0, 0, 0, 0}),
37 avg_pool_param_t<>(1, 640, {50, 50}, {50, 50}, {1, 1}, {0, 0, 0, 0}),
38
39 avg_pool_param_t<>(1, 432, {30, 30}, {30, 30}, {1, 1}, {0, 0, 0, 0}),
40 avg_pool_param_t<>(1, 128, {60, 60}, {60, 60}, {1, 1}, {0, 0, 0, 0}),
41 avg_pool_param_t<>(1, 168, {60, 60}, {60, 60}, {1, 1}, {0, 0, 0, 0}),
42 avg_pool_param_t<>(1, 440, {30, 30}, {30, 30}, {1, 1}, {0, 0, 0, 0}),
43
44 avg_pool_param_t<>(1, 7392, {7, 7}, {7, 7}, {1, 1}, {0, 0, 0, 0}),
45 avg_pool_param_t<>(1, 528, {56, 56}, {56, 56}, {1, 1}, {0, 0, 0, 0}),
46 avg_pool_param_t<>(1, 1056, {28, 28}, {28, 28}, {1, 1}, {0, 0, 0, 0}),
47 avg_pool_param_t<>(1, 2904, {14, 14}, {14, 14}, {1, 1}, {0, 0, 0, 0}),
48
49 avg_pool_param_t<>(1, 1536, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
50 avg_pool_param_t<>(1, 3072, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
51 avg_pool_param_t<>(1, 1920, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
52 avg_pool_param_t<>(1, 2304, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
53 avg_pool_param_t<>(1, 512, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
54
55 avg_pool_param_t<>(1, 1240, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
56 avg_pool_param_t<>(1, 864, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
57 avg_pool_param_t<>(1, 1488, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}),
58 avg_pool_param_t<>(1, 272, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0})
59
60};
61
62/*
63 * Benchmark a number of AvgPool2d operators with representative input shapes.
64 * There are a number of parallel AvgPoool2d nodes which are created, one
65 * per core. Then these are chained together in multiple layers.
66 * To ensure sizes match up between the output of a layer and
67 * input of the next layer, we introduce a broadcast op.
68 * After each layer, output tensor is passed to the next layer.
69 */
70class Int8AvgPool2dParallelBench : public Benchmark {
71 /// Matrices.
72 avg_pool_param_t<2> input_shape_;
73 size_t numLayers_;
74 PlaceholderBindings bindings_;
75 std::unique_ptr<runtime::HostManager> hostManager_;
76 size_t asyncLaunchSize_;
77 size_t numCores_;
78 const char *backendStr_;
79 const char *devId_;
80
81public:
82 Int8AvgPool2dParallelBench(avg_pool_param_t<2> &input_shape_,
83 size_t numLayers_, size_t asyncLaunchSize_,
84 size_t numCores_, const char *backendStr_,
85 const char *devId_)
86 : input_shape_(input_shape_), numLayers_(numLayers_),
87 asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_),
88 backendStr_(backendStr_), devId_(devId_) {}
89
90 void setup() override {
91
92 // Setup host manager
93 std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
94 auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
95 if (devId_ != nullptr) {
96 config->parameters["DeviceID"] = devId_;
97 }
98 configs.push_back(std::move(config));
99 hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
100
101 dim_t N, IC, IH, IW, OC;
102 N = input_shape_.MB;
103 IC = input_shape_.IC;
104 IH = input_shape_.IN_DIM[0];
105 IW = input_shape_.IN_DIM[1];
106 OC = input_shape_.OC;
107
108 std::unique_ptr<Module> mod(new Module);
109 auto fn = mod->createFunction("singleNode");
110
111 std::vector<Node *> cur(numCores_);
112
113 std::vector<Placeholder *> input(numCores_);
114 std::vector<Placeholder *> output(numCores_);
115
116 for (size_t core = 0; core < numCores_; core++) {
117 input[core] =
118 mod->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, IC}, 1.0, 0,
119 "input_" + std::to_string(core), false);
120 output[core] =
121 mod->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, OC}, 1.0, 0,
122 "output_" + std::to_string(core), false);
123 cur[core] = input[core];
124 }
125
126 for (size_t layer = 0; layer < numLayers_; layer++) {
127 for (size_t core = 0; core < numCores_; core++) {
128 auto pool = fn->createAvgPool("pool_" + std::to_string(core) + "_" +
129 std::to_string(layer),
130 cur[core],
131 {(unsigned int)(input_shape_.K[0]),
132 (unsigned int)(input_shape_.K[1])},
133 {(unsigned int)(input_shape_.stride[0]),
134 (unsigned int)(input_shape_.stride[1])},
135 {(unsigned int)(input_shape_.pad[0]),
136 (unsigned int)(input_shape_.pad[1]),
137 (unsigned int)(input_shape_.pad[2]),
138 (unsigned int)(input_shape_.pad[3])});
139 auto tilex = fn->createTile("tile_dim1_" + std::to_string(core) + "_" +
140 std::to_string(layer),
141 pool, (unsigned int)(input_shape_.K[0]), 1);
142 auto tiley = fn->createTile(
143 "tile_dim2_" + std::to_string(core) + "_" + std::to_string(layer),
144 tilex, (unsigned int)(input_shape_.K[1]), 2);
145 cur[core] = tiley;
146 }
147 }
148 for (size_t core = 0; core < numCores_; core++) {
149 fn->createSave("save" + std::to_string(core), cur[core], output[core]);
150 }
151
152 for (size_t core = 0; core < numCores_; core++) {
153 ::glow::convertPlaceholdersToConstants(fn, bindings_,
154 {
155 input[core],
156 output[core],
157 });
158 }
159 CompilationContext ctx;
160 EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
161 }
162
163 void run() override {
164 std::vector<std::promise<void>> promises(asyncLaunchSize_);
165 std::vector<std::future<void>> futures;
166 for (auto &runPromise : promises) {
167 std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
168 futures.push_back(runPromise.get_future());
169 hostManager_->runNetwork(
170 "singleNode", std::move(contextPtr),
171 [&runPromise](runtime::RunIdentifierTy, Error err,
172 std::unique_ptr<ExecutionContext> /* contextPtr */) {
173 EXIT_ON_ERR(std::move(err));
174 runPromise.set_value();
175 });
176 }
177 for (auto &fut : futures) {
178 fut.wait();
179 }
180 }
181
182 void teardown() override {}
183};
184
185int main(int argc, char *argv[]) {
186 size_t numLayers = atoi(argv[1]);
187 size_t reps = atoi(argv[2]);
188 size_t asyncLaunches = atoi(argv[3]);
189 size_t numCores = atoi(argv[4]);
190 const char *backendStr = argv[5];
191 char *dev_id = nullptr;
192
193 printf("Int8AvgPool2dParallel Microbenchmark\n");
194 printf(
195 "Usage: Int8AvgPool2dParallelBench numLayers(Int) "
196 "numReps(Int) "
197 "numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n");
198 printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
199 "environment variable\n");
200 benchParseGlowOpts(argc, argv);
201 assert(argc == 6 || argc == 7);
202 if (argc > 6) {
203 dev_id = argv[6];
204 printf("Setting backend device: \"%s\"\n", dev_id);
205 }
206 printf("Start Int8AvgPool2dParallelBench\n");
207 size_t shape_idx = 0;
208 size_t total_input_shapes = shapes_2d.size();
209 for (auto shape : shapes_2d) {
210 double gflops = 1.0 * (shape.IC) * shape.K[0] * shape.K[1] * (shape.OC) *
211 shape.OUT_DIM[0] * shape.OUT_DIM[1];
212 gflops *= numLayers * numCores / 1e9;
213
214 string shape_info = shape.toString();
215
216 printf("\n=====Input shape %zu/%zu: %s\n", shape_idx, total_input_shapes,
217 shape_info.c_str());
218 Int8AvgPool2dParallelBench b(shape, numLayers, asyncLaunches, numCores,
219 backendStr, dev_id);
220 auto times = bench(&b, reps);
221 for (auto t : times) {
222 printf("BenchResult,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,"
223 "%s,%"
224 "2.6lf,%5.2lf\n",
225 numLayers, reps, asyncLaunches, numCores, backendStr,
226 t / asyncLaunches, gflops * asyncLaunches / t);
227 }
228 double min = *(std::min_element(times.begin(), times.end()));
229 size_t midElt = times.size() / 2;
230 std::nth_element(times.begin(), times.begin() + midElt, times.end());
231 double median = times[midElt];
232 double median_runtime = median / ((double)asyncLaunches);
233 double min_runtime = min / ((double)asyncLaunches);
234 printf("BenchSummary,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%s,%"
235 "2.6lf,%2.6lf,%5.2lf,%5.2lf\n",
236 numLayers, reps, asyncLaunches, numCores, backendStr, median_runtime,
237 min_runtime, gflops / median_runtime, gflops / min_runtime);
238 shape_idx++;
239 }
240}
241