Int8GemmBench.cpp source code [glow/tests/benchmark/Int8GemmBench.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <array>
17	#include <cstdlib>
18	#include <fstream>
19	#include <future>
20	#include <random>
21	#include <string>
22
23	#include "Bench.h"
24
25	#include "glow/ExecutionEngine/ExecutionEngine.h"
26	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
27
28	#include "llvm/Support/CommandLine.h"
29	#include "llvm/Support/FileSystem.h"
30	#include "llvm/Support/Signals.h"
31
32	#include "tests/unittests/BackendTestUtils.h"
33
34	using namespace glow;
35
36	/*
37	* This class implements a Int8 Quantized GEMM/FC microbenchmark. There are a
38	* set of (m x k) * (k x n) = (m x n) matrix multiplications, chained together
39	* in multiple layers.
40	*
41	* Microbenchmarks are generally useful for understanding performance
42	* through targeted experiementation and are not representative of
43	* end-to-end workloads.
44	*/
45	// TODO: Move all the args passed by command line to LLVM options.
46	llvm::cl::OptionCategory int8GemmBenchCat("Int8GemmBench Category");
47	llvm::cl::opt<bool> checkCorrectness(
48	"check-results",
49	llvm::cl::desc ("Check the correctness of the results against the reference "
50	"backend (Interpreter)"),
51	llvm::cl::Optional, llvm::cl::init(false), llvm::cl::cat (int8GemmBenchCat));
52
53	struct Int8GemmParam {
54	dim_t m_;
55	dim_t n_;
56	dim_t k_;
57	dim_t numLayers_;
58	dim_t numReps_;
59	dim_t numAsyncLaunches_;
60	dim_t numSplits_;
61	std::string backendStr_;
62	std::string devId_;
63	};
64
65	class Int8GemmBench : public Benchmark {
66	Int8GemmParam param_;
67	ExecutionContext context_;
68	PlaceholderBindings &bindings_;
69	std::unique_ptr<runtime::HostManager> hostManager_;
70
71	// Refernce bindings and network:
72	ExecutionContext refContext_;
73	PlaceholderBindings &refBindings_;
74	std::unique_ptr<runtime::HostManager> refHostManager_;
75
76	public:
77	explicit Int8GemmBench(Int8GemmParam param_)
78	: param_(param_), bindings_(*context_.getPlaceholderBindings()),
79	refBindings_(*refContext_.getPlaceholderBindings()) {}
80
81	void addInt8GemmNode(std::unique_ptr<Module> &mod, Function *fn,
82	Int8GemmParam param, bool isRef) {
83
84	PlaceholderBindings &bindings = isRef ? refBindings_ : bindings_;
85	auto *input = mod ->createPlaceholder(ElemKind::Float16Ty,
86	{param.m_, param.k_}, "input", false);
87	bindings.allocate(input)->getHandle<float16>().randomize(-`5.f`, `5.f`,
88	mod ->getPRNG());
89	auto *output = mod ->createPlaceholder(
90	ElemKind::Float16Ty, {param.m_, param.n_}, "output", false);
91	auto *q_input = fn->createQuantize(
92	"int8_quantize", input,
93	mod ->uniqueType(ElemKind::Int8QTy, {param.m_, param.k_}, `1.0`, `0`));
94	Node *cur = q_input;
95
96	Placeholder *ones;
97	if (param.k_ > param.n_) {
98	ones = mod ->createPlaceholder(ElemKind::Int8QTy,
99	{param.m_ * (param.k_ - param.n_)}, `1.0`, `0`,
100	"ones", false);
101	bindings.allocate(ones)->getHandle<int8_t>().clear(`1`);
102	}
103
104	Placeholder *weights;
105	Placeholder *bias;
106
107	// Create multiple layers of FC nodes
108	for (size_t layer = `0`; layer < param.numLayers_; layer++) {
109	weights =
110	mod ->createPlaceholder(ElemKind::Int8QTy, {param.k_, param.n_}, `1.0`,
111	`0`, "weights" + std::to_string(layer), false);
112	bias = mod ->createPlaceholder(ElemKind::Int32QTy, {param.n_}, `1.0`, `0`,
113	"bias" + std::to_string(layer), false);
114
115	bindings.allocate(weights)->getHandle<int8_t>().randomize(-`128`, `127`,
116	mod ->getPRNG());
117	bindings.allocate(bias)->getHandle<int32_t>().randomize(-`128`, `127`,
118	mod ->getPRNG());
119
120	Node *fc;
121	fc = fn->createFullyConnected("fc_" + std::to_string(layer), cur, weights,
122	bias);
123	cur = fc;
124
125	// Handle non-square cases
126	if (param.k_ > param.n_ && layer < (param.numLayers_ - `1`)) {
127	Node *reshape1 = fn->createReshape("reshape1_" + std::to_string(layer),
128	fc, {param.m_ * param.n_});
129	Node *concat = fn->createConcat("concat_" + std::to_string(layer),
130	{reshape1, ones}, `0`);
131	Node *reshape2 = fn->createReshape("reshape2_" + std::to_string(layer),
132	concat, {param.m_, param.k_});
133	cur = reshape2;
134	} else if (param.k_ < param.n_ && layer < (param.numLayers_ - `1`)) {
135	Node *slice = fn->createSlice("slice_" + std::to_string(layer), fc,
136	{`0`, `0`}, {param.m_, param.k_});
137	cur = slice;
138	}
139	}
140	auto *dequantized_fc = fn->createDequantize(
141	"int8_dequantize", cur,
142	mod ->uniqueType(ElemKind::Float16Ty, {param.m_, param.n_}));
143	cur = dequantized_fc;
144	fn->createSave("save1", cur, output);
145	bindings.allocate(output);
146	::glow::convertPlaceholdersToConstants(fn, bindings, {input, output});
147	}
148
149	void parallelize(Function *fn) {
150	// Model parallelize FCs
151	llvm::DenseMap<Node *, size_t> numOfChunks;
152	llvm::DenseMap<Node *, ParallelTransformKind> parOpts;
153	for (auto &N : fn->getNodes()) {
154	if (N.getKind() == Kinded::Kind::FullyConnectedNodeKind) {
155	numOfChunks [&N] = param_.numSplits_;
156	parOpts [&N] = ParallelTransformKind::Model;
157	}
158	}
159
160	// Parallelize Quantize/Dequantize
161	for (auto &N : fn->getNodes()) {
162	if (N.getKind() == Kinded::Kind::QuantizeNodeKind \|\|
163	N.getKind() == Kinded::Kind::DequantizeNodeKind) {
164	numOfChunks [&N] = param_.numSplits_;
165	parOpts [&N] = ParallelTransformKind::Data;
166	}
167	}
168	EXIT_ON_ERR(parallelizeOps(fn, numOfChunks, parOpts, `1`));
169	}
170
171	void setup_internal(bool isRef) {
172	// Setup host manager
173	std::string backendStr = isRef ? "Interpreter" : param_.backendStr_.c_str();
174	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
175	auto config = glow::make_unique<runtime::DeviceConfig>(backendStr);
176	if (param_.devId_ != "") {
177	config ->parameters ["DeviceID"] = param_.devId_.c_str();
178	}
179	configs.push_back(std::move(config));
180	if (isRef) {
181	refHostManager_ =
182	glow::make_unique<runtime::HostManager>(std::move(configs));
183	} else {
184	hostManager_ =
185	glow::make_unique<runtime::HostManager>(std::move(configs));
186	}
187
188	std::unique_ptr<Module> mod(new Module);
189	auto fn = mod ->createFunction("singleNode");
190
191	addInt8GemmNode(mod, fn, param_, isRef);
192	parallelize(fn);
193	optimize(fn, CompilationMode::Infer);
194
195	CompilationContext ctx;
196	ctx.dumpFinalGraph = true;
197	if (isRef) {
198	EXIT_ON_ERR(refHostManager_->addNetwork(std::move(mod), ctx));
199	} else {
200	EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
201	}
202	}
203
204	void setup() override {
205	if (checkCorrectness) {
206	setup_internal(/ isRef / true);
207	}
208	setup_internal(/ isRef / false);
209	}
210
211	void checkOutput() {
212	// First run on the reference backend
213	dispatchInference("singleNode", refHostManager_.get(), refContext_,
214	param_.numAsyncLaunches_,
215	/useNewExecutionContext/ true);
216	Tensor *refTensor =
217	refBindings_.get(refBindings_.getPlaceholderByNameSlow("output"));
218	CHECK(refTensor) << "Reference Tensor not found";
219
220	Tensor *noRefTensor =
221	bindings_.get(bindings_.getPlaceholderByNameSlow("output"));
222	CHECK(noRefTensor) << "non-reference Tensor not found";
223
224	// Compare the tensors
225	if (!noRefTensor->isEqual(*refTensor)) {
226	noRefTensor->dump();
227	refTensor->dump();
228	LOG(FATAL) << "Tensors don't match\n";
229	} else {
230	LOG(INFO) << "Tensors match\n";
231	}
232	}
233
234	void run() override {
235	dispatchInference("singleNode", hostManager_.get(), context_,
236	param_.numAsyncLaunches_,
237	/useNewExecutionContext/ true);
238	if (checkCorrectness) {
239	checkOutput();
240	}
241	}
242
243	void teardown() override {}
244
245	double gops() const {
246	return `2.0` * param_.m_ * param_.n_ * param_.k_ * param_.numLayers_ / `1e9`;
247	}
248	};
249
250	#define DEVICE_ID 9
251
252	Int8GemmParam parseArgs(int argc, char *argv[]) {
253	Int8GemmParam param;
254
255	param.m_ = atoi(argv[`1`]);
256	param.n_ = atoi(argv[`2`]);
257	param.k_ = atoi(argv[`3`]);
258	param.numLayers_ = atoi(argv[`4`]);
259	param.numReps_ = atoi(argv[`5`]);
260	param.numAsyncLaunches_ = atoi(argv[`6`]);
261	param.numSplits_ = atoi(argv[`7`]);
262	param.backendStr_ = std::string (argv[`8`]);
263
264	printf("m %zu\n", (size_t)param.m_);
265	printf("n %zu\n", (size_t)param.n_);
266	printf("k %zu\n", (size_t)param.k_);
267	printf("numLayers %zu\n", (size_t)param.numLayers_);
268	printf("numReps %zu\n", (size_t)param.numReps_);
269	printf("numAsyncLaunches %zu\n", (size_t)param.numAsyncLaunches_);
270	printf("numSplits %zu\n", (size_t)param.numSplits_);
271	printf("backendStr %s\n", param.backendStr_.c_str());
272
273	if (argc > DEVICE_ID) {
274	printf("devId %s\n", argv[DEVICE_ID]);
275	param.devId_ = std::string (argv[DEVICE_ID]);
276	} else {
277	param.devId_ = std::string ("");
278	}
279	printf("\n\n");
280	return param;
281	}
282
283	int main(int argc, char *argv[]) {
284	printf("GEMM Microbenchmark\n");
285	printf("Usage: GemmBench m(Int) n(Int) k(Int) numLayers(Int) numReps(Int) "
286	"numAsyncLaunches(Int) numSplits(Int) backendStr(String) "
287	"dev_id(Int)\n");
288	printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
289	"environment variable\n");
290	benchParseGlowOpts(argc, argv);
291
292	std::vector<Int8GemmParam> params;
293	std::string runHeader;
294	std::string runPrefix;
295
296	// Using a config file
297	if (argc == `2`) {
298	auto fname = std::string (argv[`1`]);
299	std::ifstream fin(fname.c_str());
300	if (!fin) {
301	std::cout << "Could not open file: " << fname << std::endl;
302	exit(`0`);
303	}
304	std::string line;
305	while (getline(fin, line)) {
306	std::array<char, `1024`> buf;
307	char saveptr = nullptr*;
308	std::vector<char *> argVec;
309	strcpy(buf.data(), line.c_str());
310	char *ptr = strtok_r(buf.data(), " ", &saveptr);
311	while (ptr != nullptr) {
312	argVec.push_back(ptr);
313	ptr = strtok_r(nullptr, " ", &saveptr);
314	}
315	Int8GemmParam param = parseArgs(argVec.size(), argVec.data());
316	params.push_back(param);
317	runHeader = std::string ("_,benchName,_,filename");
318	runPrefix = std::string (strFormat("GemmBench,SW,%s", fname.c_str()));
319	}
320	} else if (argc == `9` \|\| argc == `10`) {
321	Int8GemmParam param = parseArgs(argc, argv);
322	params.push_back(param);
323	runHeader = std::string (
324	"_,benchName,_,m,n,k,numLayers,numReps,numAsyncLaunches,numSplits,"
325	"backendStr\n");
326	runPrefix = std::string (strFormat(
327	"GemmBench,SW,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%s", (size_t)param.m_,
328	(size_t)param.n_, (size_t)param.k_, (size_t)param.numLayers_,
329	(size_t)param.numReps_, (size_t)param.numAsyncLaunches_,
330	(size_t)param.numSplits_, argv[`8`]));
331	} else {
332	llvm_unreachable("Invalid command line");
333	}
334
335	for (auto param : params) {
336	Int8GemmBench b(param);
337	auto times = bench(&b, param.numReps_);
338
339	printf("%s,runtime,gflopPerSec\n", runHeader.c_str());
340	for (auto t : times) {
341	printf("BenchResult,%s,%f,%f\n", runPrefix.c_str(),
342	t / param.numAsyncLaunches_,
343	b.gops() * param.numAsyncLaunches_ / t);
344	}
345	double min = *(std::min_element(times.begin(), times.end()));
346	dim_t midElt = times.size() / `2`;
347	std::nth_element(times.begin(), times.begin() + midElt, times.end());
348	double median = times [midElt];
349	double medianRuntime = median / ((double)param.numAsyncLaunches_);
350	double minRuntime = min / ((double)param.numAsyncLaunches_);
351	printf("%s,medianRuntime,minRuntime,medianGflopPerSec,maxGflopPerSec\n",
352	runHeader.c_str());
353	printf("BenchSummary,%s,%f,%f,%f,%f\n", runPrefix.c_str(), medianRuntime,
354	minRuntime, b.gops() / medianRuntime, b.gops() / minRuntime);
355	}
356	}
357

Browse the source code of glow/tests/benchmark/Int8GemmBench.cpp