Int8Conv2dParallelBench.cpp source code [glow/tests/benchmark/Int8Conv2dParallelBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <array>
17	#include <cstdlib>
18	#include <future>
19	#include <random>
20
21	#include "Bench.h"
22
23	#include "ConvUtils.h"
24	#include "glow/ExecutionEngine/ExecutionEngine.h"
25	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26
27	using namespace glow;
28	using namespace std;
29
30	vector<vector<conv_param_t<`2`>>> shapes_2d = {
31	// MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w,
32	// pad_h_top, pad_w_left, pad_h_bottom, pad_w_right
33	// 2D convolutions
34	// regular
35	{conv_param_t<>(`1`, `128`, `128`, {`56`, `56`}, `1`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`})},
36	// groupwise
37	{conv_param_t<>(`1`, `128`, `128`, {`56`, `56`}, `32`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`})},
38	// DW
39	{conv_param_t<>(`1`, `272`, `272`, {`47`, `125`}, `272`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`})},
40	// Pointwise
41	{conv_param_t<>(`1`, `128`, `128`, {`56`, `56`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})},
42	// bottleneck blocks
43	{conv_param_t<>(`1`, `256`, `128`, {`56`, `56`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
44	conv_param_t<>(`1`, `128`, `128`, {`56`, `56`}, `32`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`}),
45	conv_param_t<>(`1`, `128`, `256`, {`56`, `56`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})},
46	{conv_param_t<>(`1`, `512`, `256`, {`28`, `28`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
47	conv_param_t<>(`1`, `256`, `256`, {`28`, `28`}, `32`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`}),
48	conv_param_t<>(`1`, `256`, `512`, {`28`, `28`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})},
49	{conv_param_t<>(`1`, `1024`, `512`, {`14`, `14`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
50	conv_param_t<>(`1`, `512`, `512`, {`14`, `14`}, `32`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`}),
51	conv_param_t<>(`1`, `512`, `1024`, {`14`, `14`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})},
52	{conv_param_t<>(`1`, `2048`, `1024`, {`7`, `7`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}),
53	conv_param_t<>(`1`, `1024`, `1024`, {`7`, `7`}, `32`, {`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`}),
54	conv_param_t<>(`1`, `1024`, `2048`, {`7`, `7`}, `1`, {`1`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`})}
55
56	};
57
58	/*
59	* Benchmark a number of Conv2d operators with representative input shapes.
60	* There are a number of parallel Conv2d nodes which are created, one
61	* per core. Each core handles one weight matrix. Then these are chained
62	* together in multiple layers. After each layer, output tensor is passed to the
63	* next layer.
64	*/
65	class Int8Conv2dParallelBench : public Benchmark {
66	/// Matrices.
67	std::vector<conv_param_t<`2`>> input_shapes_;
68	size_t numLayers_;
69	PlaceholderBindings bindings_;
70	std::unique_ptr<runtime::HostManager> hostManager_;
71	size_t asyncLaunchSize_;
72	size_t numCores_;
73	const char *backendStr_;
74	const char *devId_;
75
76	public:
77	Int8Conv2dParallelBench(vector<conv_param_t<`2`>> &input_shapes_,
78	size_t numLayers_, size_t asyncLaunchSize_,
79	size_t numCores_, const char *backendStr_,
80	const char *devId_)
81	: input_shapes_(input_shapes_), numLayers_(numLayers_),
82	asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_),
83	backendStr_(backendStr_), devId_(devId_) {}
84
85	void setup() override {
86
87	// Setup host manager
88	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
89	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
90	if (devId_ != nullptr) {
91	config ->parameters ["DeviceID"] = devId_;
92	}
93	configs.push_back(std::move(config));
94	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
95
96	dim_t N, IC, IH, IW, OC, OH, OW;
97	if (input_shapes_.size() == `1`) {
98	N = input_shapes_[`0`].MB;
99	IC = input_shapes_[`0`].IC;
100	IH = input_shapes_[`0`].IN_DIM [`0`];
101	IW = input_shapes_[`0`].IN_DIM [`1`];
102	OC = input_shapes_[`0`].OC;
103	OH = input_shapes_[`0`].OUT_DIM [`0`];
104	OW = input_shapes_[`0`].OUT_DIM [`1`];
105	} else {
106	N = input_shapes_[`0`].MB;
107	IC = input_shapes_[`0`].IC;
108	IH = input_shapes_[`0`].IN_DIM [`0`];
109	IW = input_shapes_[`0`].IN_DIM [`1`];
110	OC = input_shapes_[input_shapes_.size() - `1`].OC;
111	OH = input_shapes_[input_shapes_.size() - `1`].OUT_DIM [`0`];
112	OW = input_shapes_[input_shapes_.size() - `1`].OUT_DIM [`1`];
113	}
114	std::unique_ptr<Module> mod(new Module);
115	auto fn = mod ->createFunction("singleNode");
116
117	std::vector<Node *> cur(numCores_);
118	std::vector<Placeholder > filters(numCores_ input_shapes_.size());
119	std::vector<Placeholder > bias(numCores_ input_shapes_.size());
120	std::vector<Node > conv(numCores_ input_shapes_.size());
121	std::vector<Placeholder *> input(numCores_);
122	std::vector<Placeholder *> output(numCores_);
123
124	for (size_t core = `0`; core < numCores_; core++) {
125	input [core] =
126	mod ->createPlaceholder(ElemKind::Int8QTy, {N, IH, IW, IC}, `1.0`, `0`,
127	"input_" + std::to_string(core), false);
128	output [core] =
129	mod ->createPlaceholder(ElemKind::Int8QTy, {N, OH, OW, OC}, `1.0`, `0`,
130	"output_" + std::to_string(core), false);
131	cur [core] = input [core];
132	}
133
134	for (size_t layer = `0`; layer < numLayers_; layer++) {
135	for (size_t core = `0`; core < numCores_; core++) {
136	size_t conv_ops = `0`;
137	for (auto conv_param : input_shapes_) {
138	filters [core * input_shapes_.size() + conv_ops] =
139	mod ->createPlaceholder(ElemKind::Int8QTy,
140	{(dim_t)(conv_param.OC),
141	(dim_t)(conv_param.K [`0`]),
142	(dim_t)(conv_param.K [`1`]),
143	(dim_t)(conv_param.IC / conv_param.G)},
144	`1.0`, `0`,
145	"filters_" + std::to_string(core) + "_" +
146	std::to_string(conv_ops),
147	false);
148	bias [core * input_shapes_.size() + conv_ops] = mod ->createPlaceholder(
149	ElemKind::Int32QTy, {(dim_t)(conv_param.OC)}, `1.0`, `0`,
150	"bias_" + std::to_string(core) + "_" + std::to_string(conv_ops),
151	false);
152	bindings_.allocate(filters [core * input_shapes_.size() + conv_ops])
153	->getHandle<int8_t>()
154	.clear(`0`);
155	bindings_.allocate(bias [core * input_shapes_.size() + conv_ops])
156	->getHandle<int32_t>()
157	.clear(`0`);
158	auto outTy = mod ->uniqueType(
159	ElemKind::Int8QTy,
160	{(dim_t)(conv_param.MB), (dim_t)(conv_param.OUT_DIM [`0`]),
161	(dim_t)(conv_param.OUT_DIM [`1`]), (dim_t)(conv_param.OC)},
162	`1.0`, `0`);
163	conv [core * input_shapes_.size() + conv_ops] = fn->createConv(
164	"conv" + std::to_string(core) + "_" + std::to_string(layer) +
165	"_" + std::to_string(conv_ops),
166	cur [core], filters [core * input_shapes_.size() + conv_ops],
167	bias [core * input_shapes_.size() + conv_ops], outTy,
168	{(unsigned int)(conv_param.K [`0`]),
169	(unsigned int)(conv_param.K [`1`])},
170	{(unsigned int)(conv_param.stride [`0`]),
171	(unsigned int)(conv_param.stride [`1`])},
172	{(unsigned int)(conv_param.pad [`0`]),
173	(unsigned int)(conv_param.pad [`1`]),
174	(unsigned int)(conv_param.pad [`2`]),
175	(unsigned int)(conv_param.pad [`3`])},
176	(unsigned int)(conv_param.G),
177	{(unsigned int)(conv_param.dilation [`0`]),
178	(unsigned int)(conv_param.dilation [`1`])});
179	cur [core] = conv [core * input_shapes_.size() + conv_ops];
180	conv_ops += `1`;
181	}
182	}
183	}
184	for (size_t core = `0`; core < numCores_; core++) {
185	fn->createSave("save" + std::to_string(core), cur [core], output [core]);
186	}
187
188	for (size_t core = `0`; core < numCores_; core++) {
189	::glow::convertPlaceholdersToConstants(fn, bindings_,
190	{
191	input [core],
192	output [core],
193	});
194	}
195
196	CompilationContext ctx;
197	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
198	}
199
200	void run() override {
201	std::vector<std::promise<void>> promises(asyncLaunchSize_);
202	std::vector<std::future<void>> futures;
203	for (auto &runPromise : promises) {
204	std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
205	futures.push_back(runPromise.get_future());
206	hostManager_->runNetwork(
207	"singleNode", std::move(contextPtr),
208	[&runPromise](runtime::RunIdentifierTy, Error err,
209	std::unique_ptr<ExecutionContext> / contextPtr /) {
210	EXIT_ON_ERR(std::move(err));
211	runPromise.set_value();
212	});
213	}
214	for (auto &fut : futures) {
215	fut.wait();
216	}
217	}
218
219	void teardown() override {}
220	};
221
222	int main(int argc, char *argv[]) {
223	size_t numLayers = atoi(argv[`1`]);
224	size_t reps = atoi(argv[`2`]);
225	size_t asyncLaunches = atoi(argv[`3`]);
226	size_t numCores = atoi(argv[`4`]);
227	const char *backendStr = argv[`5`];
228	char dev_id = nullptr*;
229
230	printf("Int8Conv2dParallel Microbenchmark\n");
231	printf(
232	"Usage: Int8Conv2dParallelBench numLayers(Int) "
233	"numReps(Int) "
234	"numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n");
235	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
236	"environment variable\n");
237	benchParseGlowOpts(argc, argv);
238	assert(argc == `6` \|\| argc == `7`);
239	if (argc > `6`) {
240	dev_id = argv[`6`];
241	printf("Setting backend device: \"%s\"\n", dev_id);
242	}
243	printf("Start Int8Conv2dParallelBench\n");
244	size_t shape_idx = `0`;
245	size_t total_input_shapes = shapes_2d.size();
246	for (auto shapes : shapes_2d) {
247	double gflops = `0`;
248	string shape_info = "";
249	for (auto shape : shapes) {
250	gflops += `2.0` * shape.G * (shape.IC / shape.G) * shape.K [`0`] * shape.K [`1`] *
251	(shape.OC / shape.G) * shape.OUT_DIM [`0`] * shape.OUT_DIM [`1`];
252	if (shape_info != "") {
253	shape_info += ";";
254	}
255	shape_info += shape.toString();
256	}
257	gflops = numLayers numCores / `1e9`;
258	printf("\n=====Input shape %zu/%zu: %s\n", shape_idx, total_input_shapes,
259	shape_info.c_str());
260	Int8Conv2dParallelBench b(shapes, numLayers, asyncLaunches, numCores,
261	backendStr, dev_id);
262	auto times = bench(&b, reps);
263	for (auto t : times) {
264	printf("BenchResult,Conv2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,"
265	"%s,%"
266	"2.6lf,%5.2lf\n",
267	numLayers, reps, asyncLaunches, numCores, backendStr,
268	t / asyncLaunches, gflops * asyncLaunches / t);
269	}
270	double min = *(std::min_element(times.begin(), times.end()));
271	size_t midElt = times.size() / `2`;
272	std::nth_element(times.begin(), times.begin() + midElt, times.end());
273	double median = times [midElt];
274	double median_runtime = median / ((double)asyncLaunches);
275	double min_runtime = min / ((double)asyncLaunches);
276	printf("BenchSummary,Conv2dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%s,%"
277	"2.6lf,%2.6lf,%5.2lf,%5.2lf\n",
278	numLayers, reps, asyncLaunches, numCores, backendStr, median_runtime,
279	min_runtime, gflops / median_runtime, gflops / min_runtime);
280	shape_idx++;
281	}
282	}
283

Browse the source code of glow/tests/benchmark/Int8Conv2dParallelBench.cpp