ConcatBench.cpp source code [glow/tests/benchmark/ConcatBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <algorithm>
17	#include <array>
18	#include <cstdlib>
19	#include <future>
20	#include <random>
21
22	#include "Bench.h"
23
24	#include "glow/ExecutionEngine/ExecutionEngine.h"
25	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26
27	using namespace glow;
28
29	/*
30	* This class implements an Concat microbenchmark. There are a number of
31	* parallel Concat nodes which are created, one per core. Then these are
32	* chained together in multiple layers.
33	*
34	* Microbenchmarks are generally useful for understanding performance
35	* through targeted experiementation and are not representative of
36	* end-to-end workloads.
37	*/
38	class ConcatBench : public Benchmark {
39	dim_t m_;
40	dim_t n_;
41	dim_t numTensors_;
42	dim_t numLayers_;
43	PlaceholderBindings bindings_;
44	std::unique_ptr<runtime::HostManager> hostManager_;
45	size_t asyncLaunchSize_;
46	const char *backendStr_;
47	ElemKind dtype_;
48	size_t elementSize_;
49	const char *devId_;
50
51	public:
52	ConcatBench(dim_t m_, dim_t n_, dim_t numTensors_, dim_t numLayers_,
53	dim_t asyncLaunchSize_, const char *backendStr_,
54	const char dtypeStr_, const* char devId_ = nullptr*)
55	: m_(m_), n_(n_), numTensors_(numTensors_), numLayers_(numLayers_),
56	asyncLaunchSize_(asyncLaunchSize_), backendStr_(backendStr_),
57	devId_(devId_) {
58
59	dtype_ = ElemKind::Float16Ty;
60	elementSize_ = `2`;
61	if (std::string (dtypeStr_) == "Float16") {
62	dtype_ = ElemKind::Float16Ty;
63	elementSize_ = `2`;
64	} else if (std::string (dtypeStr_) == "Float32") {
65	dtype_ = ElemKind::FloatTy;
66	elementSize_ = `4`;
67	}
68	}
69
70	void setup() override {
71
72	// Setup host manager
73	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
74	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
75	if (devId_ != nullptr) {
76	config ->parameters ["DeviceID"] = devId_;
77	}
78	configs.push_back(std::move(config));
79	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
80
81	std::unique_ptr<Module> mod(new Module);
82	auto fn = mod ->createFunction("singleNode");
83	// Create multiple chains of Concat nodes
84	std::vector<Placeholder *> A(numTensors_);
85	std::vector<NodeValue> A_broadcast(numTensors_);
86	std::vector<NodeValue> A_concat(numTensors_);
87	std::vector<NodeValue> slices(numTensors_);
88
89	Placeholder *output;
90
91	for (size_t tensor = `0`; tensor < numTensors_; tensor++) {
92	A [tensor] = mod ->createPlaceholder(dtype_, {`1`, n_},
93	"A" + std::to_string(tensor), false);
94	A_broadcast [tensor] = fn->createBroadcast(
95	"A_bcast" + std::to_string(tensor), A [tensor], {m_, n_}, `0`);
96	}
97	output =
98	mod ->createPlaceholder(dtype_, {`1`, n_ * numTensors_}, "output", false);
99
100	for (size_t tensor = `0`; tensor < numTensors_; tensor++) {
101	A_concat [tensor / `2` * `2` + ((tensor % `2`) ? `0` : `1`)] = A_broadcast [tensor];
102	}
103	auto *concat = fn->createConcat("concat_0", A_concat, `1`);
104
105	for (size_t layer = `1`; layer < numLayers_; layer++) {
106	for (size_t tensor = `0`; tensor < numTensors_; tensor++) {
107	dim_t start_n =
108	tensor / `2` * `2` * n_ + ((tensor % `2`) ? (`3` * n_ / `2`) : (`0`));
109	dim_t end_n = start_n + ((tensor % `2`) ? (n_ / `2`) : (`3` * n_ / `2`));
110	slices [tensor] = fn->createSlice("slice_" + std::to_string(tensor),
111	concat, {`0`, start_n}, {m_, end_n});
112	}
113	for (size_t tensor = `0`; tensor < numTensors_; tensor++) {
114	A_concat [tensor / `2` * `2` + ((tensor % `2`) ? `0` : `1`)] = slices [tensor];
115	}
116	concat = fn->createConcat("concat_" + std::to_string(layer), A_concat, `1`);
117	}
118	Node *slice =
119	fn->createSlice("slice_final", concat, {`0`, `0`}, {`1`, n_ * numTensors_});
120	fn->createSave("save", slice, output);
121	CompilationContext ctx;
122	ctx.dumpFinalGraph = true;
123	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
124	}
125
126	void run() override {
127	std::vector<std::promise<void>> promises(asyncLaunchSize_);
128	std::vector<std::future<void>> futures;
129
130	// Launch a number of independent requests
131	for (auto &runPromise : promises) {
132	std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
133	futures.push_back(runPromise.get_future());
134	hostManager_->runNetwork(
135	"singleNode", std::move(contextPtr),
136	[&runPromise](runtime::RunIdentifierTy, Error err,
137	std::unique_ptr<ExecutionContext> / contextPtr /) {
138	EXIT_ON_ERR(std::move(err));
139	runPromise.set_value();
140	});
141	}
142	for (auto &fut : futures) {
143	fut.wait();
144	}
145	}
146
147	void teardown() override {}
148
149	// Two inputs per layer and one output
150	double gbytes() const {
151	return elementSize_ * m_ * n_ * numTensors_ * numLayers_ / `1e9`;
152	}
153	};
154
155	int main(int argc, char *argv[]) {
156	printf("Concat Microbenchmark\n");
157	printf("Usage: ConcatBench m(Int) n(Int) numTensors(Int) "
158	"numLayers(Int) numReps(Int) "
159	"numAsyncLaunches(Int) backendStr(String) "
160	"dtypeStr(\"Float16\"\|\"Float32\") dev_id(Int)\n");
161	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
162	"environment variable\n");
163	benchParseGlowOpts(argc, argv);
164	assert(argc == `9` \|\| argc == `10`);
165	size_t m = atoi(argv[`1`]);
166	size_t n = atoi(argv[`2`]);
167	size_t numTensors = atoi(argv[`3`]);
168	size_t numLayers = atoi(argv[`4`]);
169	size_t reps = atoi(argv[`5`]);
170	size_t asyncLaunches = atoi(argv[`6`]);
171	const char *backendStr = argv[`7`];
172	const char *dtypeStr = argv[`8`];
173	char dev_id = nullptr*;
174
175	if (argc > `9`) {
176	dev_id = argv[`9`];
177	printf("Setting backend device: \"%s\"\n", dev_id);
178	}
179
180	assert(reps > `0`);
181
182	ConcatBench b(m, n, numTensors, numLayers, asyncLaunches, backendStr,
183	dtypeStr, dev_id);
184	auto times = bench(&b, reps);
185	printf("_,benchName,_,m,n,numTensors,numLayers,numReps,numAsyncLaunches,"
186	"backendStr,dtypeStr,runtime,gbytesPerSecPerChain\n");
187	for (auto t : times) {
188	printf("BenchResult,ConcatBench,SW,%4zu,%4zu,%4zu,%4zu,%4zu,%4zu,%s,%s,"
189	"%2.6lf,%5.2lf\n",
190	m, n, numTensors, numLayers, reps, asyncLaunches, backendStr,
191	dtypeStr, t / asyncLaunches, b.gbytes() * asyncLaunches / t);
192	}
193	double min = *(std::min_element(times.begin(), times.end()));
194	size_t midElt = times.size() / `2`;
195	std::nth_element(times.begin(), times.begin() + midElt, times.end());
196	double median = times [midElt];
197	double median_runtime = median / ((double)asyncLaunches);
198	double min_runtime = min / ((double)asyncLaunches);
199	printf("_,benchName,_,m,n,numTensors,numLayers,numReps,numAsyncLaunches,"
200	"backendStr,dtypeStr,medianRuntime,minRuntime,"
201	"medianGbytesPerSecPerChain,maxGbytesPerSecPerChain\n");
202	printf("BenchSummary,ConcatBench,SW,%4zu,%4zu,%4zu,%4zu,%4zu,%4zu,%s,%s,"
203	"%2.6lf,%2.6lf,%"
204	"5.2lf, %5.2lf\n",
205	m, n, numTensors, numLayers, reps, asyncLaunches, backendStr, dtypeStr,
206	median_runtime, min_runtime, b.gbytes() / median_runtime,
207	b.gbytes() / min_runtime);
208	}
209

Browse the source code of glow/tests/benchmark/ConcatBench.cpp