Int8AvgPool2dParallelBench.cpp source code [glow/tests/benchmark/Int8AvgPool2dParallelBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <array>
17	#include <cstdlib>
18	#include <future>
19	#include <random>
20
21	#include "Bench.h"
22
23	#include "ConvUtils.h"
24	#include "glow/ExecutionEngine/ExecutionEngine.h"
25	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26
27	using namespace glow;
28	using namespace std;
29
30	vector<avg_pool_param_t<`2`>> shapes_2d = {
31	// OC = 1x1
32	// MB, IC, IH, IW (=IH), KH (=IH), KW (=IW),
33	// 2D Avg Pool with broadcasts to make multi-layer avg pools work out.
34	avg_pool_param_t<>(`1`, `768`, {`50`, `50`}, {`50`, `50`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
35	avg_pool_param_t<>(`1`, `224`, {`100`, `100`}, {`100`, `100`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
36	avg_pool_param_t<>(`1`, `192`, {`100`, `100`}, {`100`, `100`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
37	avg_pool_param_t<>(`1`, `640`, {`50`, `50`}, {`50`, `50`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
38
39	avg_pool_param_t<>(`1`, `432`, {`30`, `30`}, {`30`, `30`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
40	avg_pool_param_t<>(`1`, `128`, {`60`, `60`}, {`60`, `60`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
41	avg_pool_param_t<>(`1`, `168`, {`60`, `60`}, {`60`, `60`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
42	avg_pool_param_t<>(`1`, `440`, {`30`, `30`}, {`30`, `30`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
43
44	avg_pool_param_t<>(`1`, `7392`, {`7`, `7`}, {`7`, `7`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
45	avg_pool_param_t<>(`1`, `528`, {`56`, `56`}, {`56`, `56`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
46	avg_pool_param_t<>(`1`, `1056`, {`28`, `28`}, {`28`, `28`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
47	avg_pool_param_t<>(`1`, `2904`, {`14`, `14`}, {`14`, `14`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
48
49	avg_pool_param_t<>(`1`, `1536`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
50	avg_pool_param_t<>(`1`, `3072`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
51	avg_pool_param_t<>(`1`, `1920`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
52	avg_pool_param_t<>(`1`, `2304`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
53	avg_pool_param_t<>(`1`, `512`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
54
55	avg_pool_param_t<>(`1`, `1240`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
56	avg_pool_param_t<>(`1`, `864`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
57	avg_pool_param_t<>(`1`, `1488`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
58	avg_pool_param_t<>(`1`, `272`, {`3`, `3`}, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})
59
60	};
61
62	/*
63	* Benchmark a number of AvgPool2d operators with representative input shapes.
64	* There are a number of parallel AvgPoool2d nodes which are created, one
65	* per core. Then these are chained together in multiple layers.
66	* To ensure sizes match up between the output of a layer and
67	* input of the next layer, we introduce a broadcast op.
68	* After each layer, output tensor is passed to the next layer.
69	*/
70	class Int8AvgPool2dParallelBench : public Benchmark {
71	/// Matrices.
72	avg_pool_param_t<`2`> input_shape_;
73	size_t numLayers_;
74	PlaceholderBindings bindings_;
75	std::unique_ptr<runtime::HostManager> hostManager_;
76	size_t asyncLaunchSize_;
77	size_t numCores_;
78	const char *backendStr_;
79	const char *devId_;
80
81	public:
82	Int8AvgPool2dParallelBench(avg_pool_param_t<`2`> &input_shape_,
83	size_t numLayers_, size_t asyncLaunchSize_,
84	size_t numCores_, const char *backendStr_,
85	const char *devId_)
86	: input_shape_(input_shape_), numLayers_(numLayers_),
87	asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_),
88	backendStr_(backendStr_), devId_(devId_) {}
89
90	void setup() override {
91
92	// Setup host manager
93	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
94	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
95	if (devId_ != nullptr) {
96	config ->parameters ["DeviceID"] = devId_;
97	}
98	configs.push_back(std::move(config));
99	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
100
101	dim_t N, IC, IH, IW, OC;
102	N = input_shape_.MB;
103	IC = input_shape_.IC;
104	IH = input_shape_.IN_DIM [`0`];
105	IW = input_shape_.IN_DIM [`1`];
106	OC = input_shape_.OC;
107
108	std::unique_ptr<Module> mod(new Module);
109	auto fn = mod ->createFunction("singleNode");
110
111	std::vector<Node *> cur(numCores_);
112
113	std::vector<Placeholder *> input(numCores_);
114	std::vector<Placeholder *> output(numCores_);
115
116	for (size_t core = `0`; core < numCores_; core++) {
117	input [core] =
118	mod ->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, IC}, `1.0`, `0`,
119	"input_" + std::to_string(core), false);
120	output [core] =
121	mod ->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, OC}, `1.0`, `0`,
122	"output_" + std::to_string(core), false);
123	cur [core] = input [core];
124	}
125
126	for (size_t layer = `0`; layer < numLayers_; layer++) {
127	for (size_t core = `0`; core < numCores_; core++) {
128	auto pool = fn->createAvgPool("pool_" + std::to_string(core) + "_" +
129	std::to_string(layer),
130	cur [core],
131	{(unsigned int)(input_shape_.K [`0`]),
132	(unsigned int)(input_shape_.K [`1`])},
133	{(unsigned int)(input_shape_.stride [`0`]),
134	(unsigned int)(input_shape_.stride [`1`])},
135	{(unsigned int)(input_shape_.pad [`0`]),
136	(unsigned int)(input_shape_.pad [`1`]),
137	(unsigned int)(input_shape_.pad [`2`]),
138	(unsigned int)(input_shape_.pad [`3`])});
139	auto tilex = fn->createTile("tile_dim1_" + std::to_string(core) + "_" +
140	std::to_string(layer),
141	pool, (unsigned int)(input_shape_.K [`0`]), `1`);
142	auto tiley = fn->createTile(
143	"tile_dim2_" + std::to_string(core) + "_" + std::to_string(layer),
144	tilex, (unsigned int)(input_shape_.K [`1`]), `2`);
145	cur [core] = tiley;
146	}
147	}
148	for (size_t core = `0`; core < numCores_; core++) {
149	fn->createSave("save" + std::to_string(core), cur [core], output [core]);
150	}
151
152	for (size_t core = `0`; core < numCores_; core++) {
153	::glow::convertPlaceholdersToConstants(fn, bindings_,
154	{
155	input [core],
156	output [core],
157	});
158	}
159	CompilationContext ctx;
160	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
161	}
162
163	void run() override {
164	std::vector<std::promise<void>> promises(asyncLaunchSize_);
165	std::vector<std::future<void>> futures;
166	for (auto &runPromise : promises) {
167	std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
168	futures.push_back(runPromise.get_future());
169	hostManager_->runNetwork(
170	"singleNode", std::move(contextPtr),
171	[&runPromise](runtime::RunIdentifierTy, Error err,
172	std::unique_ptr<ExecutionContext> / contextPtr /) {
173	EXIT_ON_ERR(std::move(err));
174	runPromise.set_value();
175	});
176	}
177	for (auto &fut : futures) {
178	fut.wait();
179	}
180	}
181
182	void teardown() override {}
183	};
184
185	int main(int argc, char *argv[]) {
186	size_t numLayers = atoi(argv[`1`]);
187	size_t reps = atoi(argv[`2`]);
188	size_t asyncLaunches = atoi(argv[`3`]);
189	size_t numCores = atoi(argv[`4`]);
190	const char *backendStr = argv[`5`];
191	char dev_id = nullptr*;
192
193	printf("Int8AvgPool2dParallel Microbenchmark\n");
194	printf(
195	"Usage: Int8AvgPool2dParallelBench numLayers(Int) "
196	"numReps(Int) "
197	"numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n");
198	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
199	"environment variable\n");
200	benchParseGlowOpts(argc, argv);
201	assert(argc == `6` \|\| argc == `7`);
202	if (argc > `6`) {
203	dev_id = argv[`6`];
204	printf("Setting backend device: \"%s\"\n", dev_id);
205	}
206	printf("Start Int8AvgPool2dParallelBench\n");
207	size_t shape_idx = `0`;
208	size_t total_input_shapes = shapes_2d.size();
209	for (auto shape : shapes_2d) {
210	double gflops = `1.0` * (shape.IC) * shape.K [`0`] * shape.K [`1`] * (shape.OC) *
211	shape.OUT_DIM [`0`] * shape.OUT_DIM [`1`];
212	gflops = numLayers numCores / `1e9`;
213
214	string shape_info = shape.toString();
215
216	printf("\n=====Input shape %zu/%zu: %s\n", shape_idx, total_input_shapes,
217	shape_info.c_str());
218	Int8AvgPool2dParallelBench b(shape, numLayers, asyncLaunches, numCores,
219	backendStr, dev_id);
220	auto times = bench(&b, reps);
221	for (auto t : times) {
222	printf("BenchResult,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,"
223	"%s,%"
224	"2.6lf,%5.2lf\n",
225	numLayers, reps, asyncLaunches, numCores, backendStr,
226	t / asyncLaunches, gflops * asyncLaunches / t);
227	}
228	double min = *(std::min_element(times.begin(), times.end()));
229	size_t midElt = times.size() / `2`;
230	std::nth_element(times.begin(), times.begin() + midElt, times.end());
231	double median = times [midElt];
232	double median_runtime = median / ((double)asyncLaunches);
233	double min_runtime = min / ((double)asyncLaunches);
234	printf("BenchSummary,AvgPool2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%s,%"
235	"2.6lf,%2.6lf,%5.2lf,%5.2lf\n",
236	numLayers, reps, asyncLaunches, numCores, backendStr, median_runtime,
237	min_runtime, gflops / median_runtime, gflops / min_runtime);
238	shape_idx++;
239	}
240	}
241

Browse the source code of glow/tests/benchmark/Int8AvgPool2dParallelBench.cpp