Int8GemmParallelBench.cpp source code [glow/tests/benchmark/Int8GemmParallelBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <array>
17	#include <cstdlib>
18	#include <future>
19	#include <random>
20
21	#include "Bench.h"
22
23	#include "glow/ExecutionEngine/ExecutionEngine.h"
24	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
25
26	using namespace glow;
27
28	/*
29	* Benchmark a number of (m x n) * (n x n) matrix multiplications.
30	* There are a number of parallel FC nodes which are created, one per core.
31	* Each core handles one weight matrix. Then these are
32	* chained together in multiple layers. After each layer, output tensor
33	* is passed to the next layer.
34	*/
35	class Int8GemmParallelBench : public Benchmark {
36	/// Matrices.
37	std::vector<float> a;
38	std::vector<float> b;
39	std::vector<float> c;
40
41	/// Dimensions expressed in libjit's format.
42	size_t aDims[`2`];
43	size_t cDims[`2`];
44	size_t numLayers_;
45	PlaceholderBindings bindings_;
46	std::unique_ptr<runtime::HostManager> hostManager_;
47	size_t asyncLaunchSize_;
48	size_t numCores_;
49	const char *backendStr_;
50	const char *devId_;
51
52	public:
53	Int8GemmParallelBench(size_t m, size_t n, size_t numLayers_,
54	size_t asyncLaunchSize_, size_t numCores_,
55	const char backendStr_, const* char *devId_)
56	: aDims{m, n}, cDims{m, n}, numLayers_(numLayers_),
57	asyncLaunchSize_(asyncLaunchSize_), numCores_(numCores_),
58	backendStr_(backendStr_), devId_(devId_) {}
59
60	void setup() override {
61
62	// Setup host manager
63	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
64	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr_);
65	if (devId_ != nullptr) {
66	config ->parameters ["DeviceID"] = devId_;
67	}
68	configs.push_back(std::move(config));
69	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
70	printf("set up host manager\n");
71
72	dim_t m = cDims[`0`];
73	dim_t n = cDims[`1`];
74	dim_t k = aDims[`1`];
75	a.resize(m * k);
76	b.resize(k * n);
77	c.resize(m * n);
78
79	std::unique_ptr<Module> mod(new Module);
80	auto fn = mod ->createFunction("singleNode");
81	printf("set up module \n");
82
83	std::vector<Node *> cur(numCores_);
84	std::vector<Placeholder *> weights(numCores_);
85	std::vector<Placeholder *> bias(numCores_);
86	std::vector<Node *> fc(numCores_);
87	std::vector<Placeholder *> input(numCores_);
88	std::vector<Placeholder *> output(numCores_);
89
90	printf("set up inputs and outputs");
91	for (size_t core = `0`; core < numCores_; core++) {
92	input [core] =
93	mod ->createPlaceholder(ElemKind::Int8QTy, {m, k}, `1.0`, `0`,
94	"input_" + std::to_string(core), false);
95	output [core] =
96	mod ->createPlaceholder(ElemKind::Int8QTy, {m, n}, `1.0`, `0`,
97	"output_" + std::to_string(core), false);
98	cur [core] = input [core];
99	}
100
101	printf("set up weights and bias");
102	for (size_t layer = `0`; layer < numLayers_; layer++) {
103	for (size_t core = `0`; core < numCores_; core++) {
104	weights [core] =
105	mod ->createPlaceholder(ElemKind::Int8QTy, {k, n}, `1.0`, `0`,
106	"weights_" + std::to_string(core), false);
107	bias [core] =
108	mod ->createPlaceholder(ElemKind::Int32QTy, {n}, `1.0`, `0`,
109	"bias_" + std::to_string(core), false);
110	bindings_.allocate(weights [core])
111	->getHandle<int8_t>()
112	.randomize(`0`, `128`, mod ->getPRNG());
113	bindings_.allocate(bias [core])
114	->getHandle<int32_t>()
115	.randomize(`0`, `128`, mod ->getPRNG());
116	fc [core] = fn->createFullyConnected(
117	"fc" + std::to_string(core) + "_" + std::to_string(layer),
118	cur [core], weights [core], bias [core]);
119	cur [core] = fc [core];
120	}
121	}
122	printf("save output");
123	for (size_t core = `0`; core < numCores_; core++) {
124	fn->createSave("save" + std::to_string(core), cur [core], output [core]);
125	}
126
127	for (size_t core = `0`; core < numCores_; core++) {
128	::glow::convertPlaceholdersToConstants(fn, bindings_,
129	{
130	input [core],
131	output [core],
132	});
133	}
134
135	CompilationContext ctx;
136	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
137	}
138
139	void run() override {
140	printf("Running module");
141	std::vector<std::promise<void>> promises(asyncLaunchSize_);
142	std::vector<std::future<void>> futures;
143	for (auto &runPromise : promises) {
144	std::unique_ptr<ExecutionContext> contextPtr(new ExecutionContext);
145	futures.push_back(runPromise.get_future());
146	hostManager_->runNetwork(
147	"singleNode", std::move(contextPtr),
148	[&runPromise](runtime::RunIdentifierTy, Error err,
149	std::unique_ptr<ExecutionContext> / contextPtr /) {
150	EXIT_ON_ERR(std::move(err));
151	runPromise.set_value();
152	});
153	}
154	for (auto &fut : futures) {
155	fut.wait();
156	}
157	}
158
159	void teardown() override {}
160
161	double gflops() const {
162	return `2.0` * cDims[`0`] * cDims[`1`] * aDims[`1`] * numLayers_ * numCores_ / `1e9`;
163	}
164	};
165
166	int main(int argc, char *argv[]) {
167	size_t m = atoi(argv[`1`]);
168	size_t n = atoi(argv[`2`]);
169	size_t numLayers = atoi(argv[`3`]);
170	size_t reps = atoi(argv[`4`]);
171	size_t asyncLaunches = atoi(argv[`5`]);
172	size_t numCores = atoi(argv[`6`]);
173	const char *backendStr = argv[`7`];
174	char dev_id = nullptr*;
175
176	printf("Int8GEMMParallel Microbenchmark\n");
177	printf(
178	"Usage: Int8GemmParallelBench m(Int) n(Int) numLayers(Int) numReps(Int) "
179	"numAsyncLaunches(Int) numCores(Int) backendStr(String) dev_id(Int)\n");
180	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
181	"environment variable\n");
182	benchParseGlowOpts(argc, argv);
183	assert(argc == `8` \|\| argc == `9`);
184	if (argc > `8`) {
185	dev_id = argv[`8`];
186	printf("Setting backend device: \"%s\"\n", dev_id);
187	}
188	printf("Start Int8GemmParallelBench\n");
189	Int8GemmParallelBench b(m, n, numLayers, asyncLaunches, numCores, backendStr,
190	dev_id);
191	auto times = bench(&b, reps);
192	for (auto t : times) {
193	printf("BenchResult,GemmParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%4zu,%4zu,%s,%"
194	"2.6lf,%5.2lf\n",
195	m, n, numLayers, reps, asyncLaunches, numCores, backendStr,
196	t / asyncLaunches, b.gflops() * asyncLaunches / t);
197	}
198	double min = *(std::min_element(times.begin(), times.end()));
199	size_t midElt = times.size() / `2`;
200	std::nth_element(times.begin(), times.begin() + midElt, times.end());
201	double median = times [midElt];
202	double median_runtime = median / ((double)asyncLaunches);
203	double min_runtime = min / ((double)asyncLaunches);
204	printf("BenchSummary,GemmParallelBench,SW,%4zu,%4zu,%4zu,%4zu,%4zu,%4zu,%s,%"
205	"2.6lf,%2.6lf,%5.2lf, %5.2lf\n",
206	m, n, numLayers, reps, asyncLaunches, numCores, backendStr,
207	median_runtime, min_runtime, b.gflops() / median_runtime,
208	b.gflops() / min_runtime);
209	}
210

Browse the source code of glow/tests/benchmark/Int8GemmParallelBench.cpp