1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include <array> |
17 | #include <cstdlib> |
18 | #include <future> |
19 | #include <random> |
20 | |
21 | #include "Bench.h" |
22 | |
23 | #include "ConvUtils.h" |
24 | #include "glow/ExecutionEngine/ExecutionEngine.h" |
25 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
26 | |
27 | using namespace glow; |
28 | using namespace std; |
29 | |
30 | vector<avg_pool_param_t<2>> shapes_2d = { |
31 | // OC = 1x1 |
32 | // MB, IC, IH, IW (=IH), KH (=IH), KW (=IW), |
33 | // 2D Avg Pool with broadcasts to make multi-layer avg pools work out. |
34 | avg_pool_param_t<>(1, 768, {50, 50}, {50, 50}, {1, 1}, {0, 0, 0, 0}), |
35 | avg_pool_param_t<>(1, 224, {100, 100}, {100, 100}, {1, 1}, {0, 0, 0, 0}), |
36 | avg_pool_param_t<>(1, 192, {100, 100}, {100, 100}, {1, 1}, {0, 0, 0, 0}), |
37 | avg_pool_param_t<>(1, 640, {50, 50}, {50, 50}, {1, 1}, {0, 0, 0, 0}), |
38 | |
39 | avg_pool_param_t<>(1, 432, {30, 30}, {30, 30}, {1, 1}, {0, 0, 0, 0}), |
40 | avg_pool_param_t<>(1, 128, {60, 60}, {60, 60}, {1, 1}, {0, 0, 0, 0}), |
41 | avg_pool_param_t<>(1, 168, {60, 60}, {60, 60}, {1, 1}, {0, 0, 0, 0}), |
42 | avg_pool_param_t<>(1, 440, {30, 30}, {30, 30}, {1, 1}, {0, 0, 0, 0}), |
43 | |
44 | avg_pool_param_t<>(1, 7392, {7, 7}, {7, 7}, {1, 1}, {0, 0, 0, 0}), |
45 | avg_pool_param_t<>(1, 528, {56, 56}, {56, 56}, {1, 1}, {0, 0, 0, 0}), |
46 | avg_pool_param_t<>(1, 1056, {28, 28}, {28, 28}, {1, 1}, {0, 0, 0, 0}), |
47 | avg_pool_param_t<>(1, 2904, {14, 14}, {14, 14}, {1, 1}, {0, 0, 0, 0}), |
48 | |
49 | avg_pool_param_t<>(1, 1536, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
50 | avg_pool_param_t<>(1, 3072, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
51 | avg_pool_param_t<>(1, 1920, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
52 | avg_pool_param_t<>(1, 2304, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
53 | avg_pool_param_t<>(1, 512, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
54 | |
55 | avg_pool_param_t<>(1, 1240, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
56 | avg_pool_param_t<>(1, 864, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
57 | avg_pool_param_t<>(1, 1488, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}), |
58 | avg_pool_param_t<>(1, 272, {3, 3}, {3, 3}, {1, 1}, {0, 0, 0, 0}) |
59 | |
60 | }; |
61 | |
62 | /* |
63 | * Benchmark a number of AvgPool2d operators with representative input shapes. |
64 | * There are a number of parallel AvgPoool2d nodes which are created, one |
65 | * per core. Then these are chained together in multiple layers. |
66 | * To ensure sizes match up between the output of a layer and |
67 | * input of the next layer, we introduce a broadcast op. |
68 | * After each layer, output tensor is passed to the next layer. |
69 | */ |
70 | class Int8AvgPool2dParallelBench : public Benchmark { |
71 | /// Matrices. |
72 | avg_pool_param_t<2> input_shape_; |
73 | size_t numLayers_; |
74 | PlaceholderBindings bindings_; |
75 | std::unique_ptr<runtime::HostManager> hostManager_; |
76 | size_t asyncLaunchSize_; |
77 | size_t numCores_; |
78 | const char *backendStr_; |
79 | const char *devId_; |
80 | |
81 | public: |
82 | Int8AvgPool2dParallelBench(avg_pool_param_t<2> &input_shape_, |
83 | size_t numLayers_, size_t asyncLaunchSize_, |
84 | size_t numCores_, const char *backendStr_, |
85 | const char *devId_) |
86 | : input_shape_(input_shape_), numLayers_(numLayers_), |
87 | asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_), |
88 | backendStr_(backendStr_), devId_(devId_) {} |
89 | |
90 | void setup() override { |
91 | |
92 | // Setup host manager |
93 | std::vector<std::unique_ptr<runtime::DeviceConfig>> configs; |
94 | auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_); |
95 | if (devId_ != nullptr) { |
96 | config->parameters["DeviceID" ] = devId_; |
97 | } |
98 | configs.push_back(std::move(config)); |
99 | hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs)); |
100 | |
101 | dim_t N, IC, IH, IW, OC; |
102 | N = input_shape_.MB; |
103 | IC = input_shape_.IC; |
104 | IH = input_shape_.IN_DIM[0]; |
105 | IW = input_shape_.IN_DIM[1]; |
106 | OC = input_shape_.OC; |
107 | |
108 | std::unique_ptr<Module> mod(new Module); |
109 | auto fn = mod->createFunction("singleNode" ); |
110 | |
111 | std::vector<Node *> cur(numCores_); |
112 | |
113 | std::vector<Placeholder *> input(numCores_); |
114 | std::vector<Placeholder *> output(numCores_); |
115 | |
116 | for (size_t core = 0; core < numCores_; core++) { |
117 | input[core] = |
118 | mod->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, IC}, 1.0, 0, |
119 | "input_" + std::to_string(core), false); |
120 | output[core] = |
121 | mod->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, OC}, 1.0, 0, |
122 | "output_" + std::to_string(core), false); |
123 | cur[core] = input[core]; |
124 | } |
125 | |
126 | for (size_t layer = 0; layer < numLayers_; layer++) { |
127 | for (size_t core = 0; core < numCores_; core++) { |
128 | auto pool = fn->createAvgPool("pool_" + std::to_string(core) + "_" + |
129 | std::to_string(layer), |
130 | cur[core], |
131 | {(unsigned int)(input_shape_.K[0]), |
132 | (unsigned int)(input_shape_.K[1])}, |
133 | {(unsigned int)(input_shape_.stride[0]), |
134 | (unsigned int)(input_shape_.stride[1])}, |
135 | {(unsigned int)(input_shape_.pad[0]), |
136 | (unsigned int)(input_shape_.pad[1]), |
137 | (unsigned int)(input_shape_.pad[2]), |
138 | (unsigned int)(input_shape_.pad[3])}); |
139 | auto tilex = fn->createTile("tile_dim1_" + std::to_string(core) + "_" + |
140 | std::to_string(layer), |
141 | pool, (unsigned int)(input_shape_.K[0]), 1); |
142 | auto tiley = fn->createTile( |
143 | "tile_dim2_" + std::to_string(core) + "_" + std::to_string(layer), |
144 | tilex, (unsigned int)(input_shape_.K[1]), 2); |
145 | cur[core] = tiley; |
146 | } |
147 | } |
148 | for (size_t core = 0; core < numCores_; core++) { |
149 | fn->createSave("save" + std::to_string(core), cur[core], output[core]); |
150 | } |
151 | |
152 | for (size_t core = 0; core < numCores_; core++) { |
153 | ::glow::convertPlaceholdersToConstants(fn, bindings_, |
154 | { |
155 | input[core], |
156 | output[core], |
157 | }); |
158 | } |
159 | CompilationContext ctx; |
160 | EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx)); |
161 | } |
162 | |
163 | void run() override { |
164 | std::vector<std::promise<void>> promises(asyncLaunchSize_); |
165 | std::vector<std::future<void>> futures; |
166 | for (auto &runPromise : promises) { |
167 | std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext); |
168 | futures.push_back(runPromise.get_future()); |
169 | hostManager_->runNetwork( |
170 | "singleNode" , std::move(contextPtr), |
171 | [&runPromise](runtime::RunIdentifierTy, Error err, |
172 | std::unique_ptr<ExecutionContext> /* contextPtr */) { |
173 | EXIT_ON_ERR(std::move(err)); |
174 | runPromise.set_value(); |
175 | }); |
176 | } |
177 | for (auto &fut : futures) { |
178 | fut.wait(); |
179 | } |
180 | } |
181 | |
182 | void teardown() override {} |
183 | }; |
184 | |
185 | int main(int argc, char *argv[]) { |
186 | size_t numLayers = atoi(argv[1]); |
187 | size_t reps = atoi(argv[2]); |
188 | size_t asyncLaunches = atoi(argv[3]); |
189 | size_t numCores = atoi(argv[4]); |
190 | const char *backendStr = argv[5]; |
191 | char *dev_id = nullptr; |
192 | |
193 | printf("Int8AvgPool2dParallel Microbenchmark\n" ); |
194 | printf( |
195 | "Usage: Int8AvgPool2dParallelBench numLayers(Int) " |
196 | "numReps(Int) " |
197 | "numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n" ); |
198 | printf("Standard Glow command-line options may be passed via the GLOW_OPTS " |
199 | "environment variable\n" ); |
200 | benchParseGlowOpts(argc, argv); |
201 | assert(argc == 6 || argc == 7); |
202 | if (argc > 6) { |
203 | dev_id = argv[6]; |
204 | printf("Setting backend device: \"%s\"\n" , dev_id); |
205 | } |
206 | printf("Start Int8AvgPool2dParallelBench\n" ); |
207 | size_t shape_idx = 0; |
208 | size_t total_input_shapes = shapes_2d.size(); |
209 | for (auto shape : shapes_2d) { |
210 | double gflops = 1.0 * (shape.IC) * shape.K[0] * shape.K[1] * (shape.OC) * |
211 | shape.OUT_DIM[0] * shape.OUT_DIM[1]; |
212 | gflops *= numLayers * numCores / 1e9; |
213 | |
214 | string shape_info = shape.toString(); |
215 | |
216 | printf("\n=====Input shape %zu/%zu: %s\n" , shape_idx, total_input_shapes, |
217 | shape_info.c_str()); |
218 | Int8AvgPool2dParallelBench b(shape, numLayers, asyncLaunches, numCores, |
219 | backendStr, dev_id); |
220 | auto times = bench(&b, reps); |
221 | for (auto t : times) { |
222 | printf("BenchResult,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu," |
223 | "%s,%" |
224 | "2.6lf,%5.2lf\n" , |
225 | numLayers, reps, asyncLaunches, numCores, backendStr, |
226 | t / asyncLaunches, gflops * asyncLaunches / t); |
227 | } |
228 | double min = *(std::min_element(times.begin(), times.end())); |
229 | size_t midElt = times.size() / 2; |
230 | std::nth_element(times.begin(), times.begin() + midElt, times.end()); |
231 | double median = times[midElt]; |
232 | double median_runtime = median / ((double)asyncLaunches); |
233 | double min_runtime = min / ((double)asyncLaunches); |
234 | printf("BenchSummary,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%s,%" |
235 | "2.6lf,%2.6lf,%5.2lf,%5.2lf\n" , |
236 | numLayers, reps, asyncLaunches, numCores, backendStr, median_runtime, |
237 | min_runtime, gflops / median_runtime, gflops / min_runtime); |
238 | shape_idx++; |
239 | } |
240 | } |
241 | |