Int8Conv3dParallelBench.cpp source code [glow/tests/benchmark/Int8Conv3dParallelBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <array>
17	#include <cstdlib>
18	#include <future>
19	#include <random>
20
21	#include "Bench.h"
22
23	#include "ConvUtils.h"
24	#include "glow/ExecutionEngine/ExecutionEngine.h"
25	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26
27	using namespace glow;
28	using namespace std;
29
30	vector<vector<conv_param_t<`3`>>> shapes_3d = {
31	// MB, IC, OC, {IT, IH, IW}, G, {KT, KH, KW}, {stride_t, stride_h,
32	// stride_w},
33	// {pad_prev, pad_h_top, pad_w_left, pad_next, pad_h_bottom, pad_w_right}
34	// Regular
35	{conv_param_t<`3`>(`1`, `64`, `64`, {`8`, `14`, `14`}, `1`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
36	{`1`, `1`, `1`, `1`, `1`, `1`})},
37
38	// Groupwise
39	{conv_param_t<`3`>(`32`, `192`, `192`, {`2`, `28`, `28`}, `96`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
40	{`1`, `1`, `1`, `1`, `1`, `1`})},
41	{conv_param_t<`3`>(`32`, `192`, `192`, {`1`, `14`, `14`}, `96`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
42	{`1`, `1`, `1`, `1`, `1`, `1`})},
43	{conv_param_t<`3`>(`32`, `384`, `384`, {`1`, `14`, `14`}, `192`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
44	{`1`, `1`, `1`, `1`, `1`, `1`})},
45	{conv_param_t<`3`>(`32`, `384`, `384`, {`1`, `7`, `7`}, `192`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
46	{`1`, `1`, `1`, `1`, `1`, `1`})},
47
48	{conv_param_t<`3`>(`32`, `16`, `16`, {`4`, `56`, `56`}, `8`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
49	{`1`, `1`, `1`, `1`, `1`, `1`})},
50	{conv_param_t<`3`>(`32`, `16`, `16`, {`2`, `28`, `28`}, `8`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
51	{`1`, `1`, `1`, `1`, `1`, `1`})},
52	{conv_param_t<`3`>(`32`, `32`, `32`, {`4`, `56`, `56`}, `16`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
53	{`1`, `1`, `1`, `1`, `1`, `1`})},
54	{conv_param_t<`3`>(`32`, `32`, `32`, {`2`, `28`, `28`}, `16`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
55	{`1`, `1`, `1`, `1`, `1`, `1`})},
56	{conv_param_t<`3`>(`32`, `32`, `32`, {`2`, `28`, `28`}, `16`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
57	{`1`, `1`, `1`, `1`, `1`, `1`})},
58	{conv_param_t<`3`>(`32`, `32`, `32`, {`1`, `14`, `14`}, `16`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
59	{`1`, `1`, `1`, `1`, `1`, `1`})},
60	{conv_param_t<`3`>(`32`, `128`, `128`, {`2`, `28`, `28`}, `32`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
61	{`1`, `1`, `1`, `1`, `1`, `1`})},
62	{conv_param_t<`3`>(`32`, `128`, `128`, {`1`, `14`, `14`}, `32`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
63	{`1`, `1`, `1`, `1`, `1`, `1`})},
64	{conv_param_t<`3`>(`32`, `256`, `256`, {`1`, `14`, `14`}, `64`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
65	{`1`, `1`, `1`, `1`, `1`, `1`})},
66	{conv_param_t<`3`>(`32`, `256`, `256`, {`1`, `7`, `7`}, `64`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
67	{`1`, `1`, `1`, `1`, `1`, `1`})},
68
69	// Depthwise
70	{conv_param_t<`3`>(`1`, `64`, `64`, {`8`, `14`, `14`}, `64`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
71	{`1`, `1`, `1`, `1`, `1`, `1`})},
72
73	// Pointwise
74	{conv_param_t<`3`>(`1`, `128`, `128`, {`8`, `14`, `14`}, `1`, {`1`, `1`, `1`}, {`1`, `1`, `1`},
75	{`0`, `0`, `0`, `0`})},
76	// bottleneck blocks
77	{conv_param_t<`3`>(`1`, `192`, `192`, {`1`, `14`, `14`}, `96`, {`3`, `3`, `3`}, {`1`, `1`, `1`},
78	{`1`, `1`, `1`, `1`, `1`, `1`}),
79	conv_param_t<`3`>(`1`, `192`, `1024`, {`1`, `14`, `14`}, `1`, {`1`, `1`, `1`}, {`1`, `1`, `1`},
80	{`0`, `0`, `0`, `0`, `0`, `0`}),
81	conv_param_t<`3`>(`1`, `1024`, `192`, {`1`, `14`, `14`}, `1`, {`1`, `1`, `1`}, {`1`, `1`, `1`},
82	{`0`, `0`, `0`, `0`, `0`, `0`})}
83
84	};
85
86	/*
87	* Benchmark a number of Conv3d operators with representative input shapes.
88	* There are a number of parallel Conv3d nodes which are created, one
89	* per core. Each core handles one weight matrix. Then these are chained
90	* together in multiple layers. After each layer, output tensor is passed to the
91	* next layer.
92	*/
93	class Int8Conv3dParallelBench : public Benchmark {
94	/// Matrices.
95	std::vector<conv_param_t<`3`>> input_shapes_;
96	size_t numLayers_;
97	PlaceholderBindings bindings_;
98	std::unique_ptr<runtime::HostManager> hostManager_;
99	size_t asyncLaunchSize_;
100	size_t numCores_;
101	const char *backendStr_;
102	const char *devId_;
103
104	public:
105	Int8Conv3dParallelBench(vector<conv_param_t<`3`>> &input_shapes_,
106	size_t numLayers_, size_t asyncLaunchSize_,
107	size_t numCores_, const char *backendStr_,
108	const char *devId_)
109	: input_shapes_(input_shapes_), numLayers_(numLayers_),
110	asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_),
111	backendStr_(backendStr_), devId_(devId_) {}
112
113	void setup() override {
114
115	// Setup host manager
116	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
117	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
118	if (devId_ != nullptr) {
119	config ->parameters ["DeviceID"] = devId_;
120	}
121	configs.push_back(std::move(config));
122	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
123
124	dim_t N, IC, IT, IH, IW, OC, OT, OH, OW;
125	if (input_shapes_.size() == `1`) {
126	N = input_shapes_[`0`].MB;
127	IC = input_shapes_[`0`].IC;
128	IT = input_shapes_[`0`].IN_DIM [`0`];
129	IH = input_shapes_[`0`].IN_DIM [`1`];
130	IW = input_shapes_[`0`].IN_DIM [`1`];
131	OC = input_shapes_[`0`].OC;
132	OT = input_shapes_[`0`].OUT_DIM [`0`];
133	OH = input_shapes_[`0`].OUT_DIM [`1`];
134	OW = input_shapes_[`0`].OUT_DIM [`2`];
135	} else {
136	N = input_shapes_[`0`].MB;
137	IC = input_shapes_[`0`].IC;
138	IT = input_shapes_[`0`].IN_DIM [`0`];
139	IH = input_shapes_[`0`].IN_DIM [`1`];
140	IW = input_shapes_[`0`].IN_DIM [`1`];
141	OC = input_shapes_[input_shapes_.size() - `1`].OC;
142	OT = input_shapes_[input_shapes_.size() - `1`].OUT_DIM [`0`];
143	OH = input_shapes_[input_shapes_.size() - `1`].OUT_DIM [`1`];
144	OW = input_shapes_[input_shapes_.size() - `1`].OUT_DIM [`1`];
145	}
146	std::unique_ptr<Module> mod(new Module);
147	auto fn = mod ->createFunction("singleNode");
148
149	std::vector<Node *> cur(numCores_);
150	std::vector<Placeholder > filters(numCores_ input_shapes_.size());
151	std::vector<Placeholder > bias(numCores_ input_shapes_.size());
152	std::vector<Node > conv(numCores_ input_shapes_.size());
153	std::vector<Placeholder *> input(numCores_);
154	std::vector<Placeholder *> output(numCores_);
155
156	for (size_t core = `0`; core < numCores_; core++) {
157	input [core] =
158	mod ->createPlaceholder(ElemKind::Int8QTy, {N, IT, IH, IW, IC}, `1.0`, `0`,
159	"input_" + std::to_string(core), false);
160	output [core] =
161	mod ->createPlaceholder(ElemKind::Int8QTy, {N, OT, OH, OW, OC}, `1.0`, `0`,
162	"output_" + std::to_string(core), false);
163	cur [core] = input [core];
164	}
165
166	for (size_t layer = `0`; layer < numLayers_; layer++) {
167	for (size_t core = `0`; core < numCores_; core++) {
168	size_t conv_ops = `0`;
169	for (auto conv_param : input_shapes_) {
170	filters [core * input_shapes_.size() + conv_ops] =
171	mod ->createPlaceholder(
172	ElemKind::Int8QTy,
173	{(dim_t)(conv_param.OC), (dim_t)(conv_param.K [`0`]),
174	(dim_t)(conv_param.K [`1`]), (dim_t)(conv_param.K [`2`]),
175	(dim_t)(conv_param.IC / conv_param.G)},
176	`1.0`, `0`,
177	"filters_" + std::to_string(core) + "_" +
178	std::to_string(conv_ops),
179	false);
180	bias [core * input_shapes_.size() + conv_ops] = mod ->createPlaceholder(
181	ElemKind::Int32QTy, {(dim_t)(conv_param.OC)}, `1.0`, `0`,
182	"bias_" + std::to_string(core) + "_" + std::to_string(conv_ops),
183	false);
184	bindings_.allocate(filters [core * input_shapes_.size() + conv_ops])
185	->getHandle<int8_t>()
186	.clear(`0`);
187	bindings_.allocate(bias [core * input_shapes_.size() + conv_ops])
188	->getHandle<int32_t>()
189	.clear(`0`);
190	auto outTy = mod ->uniqueType(
191	ElemKind::Int8QTy,
192	{(dim_t)(conv_param.MB), (dim_t)(conv_param.OUT_DIM [`0`]),
193	(dim_t)(conv_param.OUT_DIM [`1`]), (dim_t)(conv_param.OUT_DIM [`2`]),
194	(dim_t)(conv_param.OC)},
195	`1.0`, `0`);
196	conv [core * input_shapes_.size() + conv_ops] = fn->createConv3D(
197	"conv" + std::to_string(core) + "_" + std::to_string(layer) +
198	"_" + std::to_string(conv_ops),
199	cur [core], filters [core * input_shapes_.size() + conv_ops],
200	bias [core * input_shapes_.size() + conv_ops], outTy,
201	{(unsigned int)(conv_param.K [`0`]), (unsigned int)(conv_param.K [`1`]),
202	(unsigned int)(conv_param.K [`2`])},
203	{(unsigned int)(conv_param.stride [`0`]),
204	(unsigned int)(conv_param.stride [`1`]),
205	(unsigned int)(conv_param.stride [`2`])},
206	{(unsigned int)(conv_param.pad [`0`]),
207	(unsigned int)(conv_param.pad [`1`]),
208	(unsigned int)(conv_param.pad [`2`]),
209	(unsigned int)(conv_param.pad [`3`]),
210	(unsigned int)(conv_param.pad [`4`]),
211	(unsigned int)(conv_param.pad [`5`])},
212	(unsigned int)(conv_param.G));
213
214	cur [core] = conv [core * input_shapes_.size() + conv_ops];
215	conv_ops += `1`;
216	}
217	}
218	}
219	for (size_t core = `0`; core < numCores_; core++) {
220	fn->createSave("save" + std::to_string(core), cur [core], output [core]);
221	}
222
223	for (size_t core = `0`; core < numCores_; core++) {
224	::glow::convertPlaceholdersToConstants(fn, bindings_,
225	{
226	input [core],
227	output [core],
228	});
229	}
230
231	CompilationContext ctx;
232	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
233	}
234
235	void run() override {
236	std::vector<std::promise<void>> promises(asyncLaunchSize_);
237	std::vector<std::future<void>> futures;
238	for (auto &runPromise : promises) {
239	std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
240	futures.push_back(runPromise.get_future());
241	hostManager_->runNetwork(
242	"singleNode", std::move(contextPtr),
243	[&runPromise](runtime::RunIdentifierTy, Error err,
244	std::unique_ptr<ExecutionContext> / contextPtr /) {
245	EXIT_ON_ERR(std::move(err));
246	runPromise.set_value();
247	});
248	}
249	for (auto &fut : futures) {
250	fut.wait();
251	}
252	}
253
254	void teardown() override {}
255	};
256
257	int main(int argc, char *argv[]) {
258	size_t numLayers = atoi(argv[`1`]);
259	size_t reps = atoi(argv[`2`]);
260	size_t asyncLaunches = atoi(argv[`3`]);
261	size_t numCores = atoi(argv[`4`]);
262	const char *backendStr = argv[`5`];
263	char dev_id = nullptr*;
264
265	printf("Int8Conv3dParallel Microbenchmark\n");
266	printf(
267	"Usage: Int8Conv3dParallelBench numLayers(Int) "
268	"numReps(Int) "
269	"numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n");
270	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
271	"environment variable\n");
272	benchParseGlowOpts(argc, argv);
273	assert(argc == `6` \|\| argc == `7`);
274	if (argc > `6`) {
275	dev_id = argv[`6`];
276	printf("Setting backend device: \"%s\"\n", dev_id);
277	}
278	printf("Start Int8Conv3dParallelBench\n");
279	size_t shape_idx = `0`;
280	size_t total_input_shapes = shapes_3d.size();
281	for (auto shapes : shapes_3d) {
282	double gflops = `0`;
283	string shape_info = "";
284	for (auto shape : shapes) {
285	gflops += `2.0` * shape.G * (shape.IC / shape.G) * shape.K [`0`] * shape.K [`1`] *
286	shape.K [`2`] * (shape.OC / shape.G) * shape.OUT_DIM [`0`] *
287	shape.OUT_DIM [`1`] * shape.OUT_DIM [`2`];
288	if (shape_info != "") {
289	shape_info += ";";
290	}
291	shape_info += shape.toString();
292	}
293	gflops = numLayers numCores / `1e9`;
294	printf("\n=====Input shape %zu/%zu: %s\n", shape_idx, total_input_shapes,
295	shape_info.c_str());
296	Int8Conv3dParallelBench b(shapes, numLayers, asyncLaunches, numCores,
297	backendStr, dev_id);
298	auto times = bench(&b, reps);
299	for (auto t : times) {
300	printf("BenchResult,Conv3dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,"
301	"%s,%"
302	"2.6lf,%5.2lf\n",
303	numLayers, reps, asyncLaunches, numCores, backendStr,
304	t / asyncLaunches, gflops * asyncLaunches / t);
305	}
306	double min = *(std::min_element(times.begin(), times.end()));
307	size_t midElt = times.size() / `2`;
308	std::nth_element(times.begin(), times.begin() + midElt, times.end());
309	double median = times [midElt];
310	double median_runtime = median / ((double)asyncLaunches);
311	double min_runtime = min / ((double)asyncLaunches);
312	printf("BenchSummary,Conv3dParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%s,%"
313	"2.6lf,%2.6lf,%5.2lf,%5.2lf\n",
314	numLayers, reps, asyncLaunches, numCores, backendStr, median_runtime,
315	min_runtime, gflops / median_runtime, gflops / min_runtime);
316	shape_idx++;
317	}
318	}
319

Browse the source code of glow/tests/benchmark/Int8Conv3dParallelBench.cpp