1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <array>
17#include <cstdlib>
18#include <fstream>
19#include <future>
20#include <random>
21#include <string>
22
23#include "Bench.h"
24
25#include "glow/ExecutionEngine/ExecutionEngine.h"
26#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
27
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/Signals.h"
31
32#include "tests/unittests/BackendTestUtils.h"
33
34using namespace glow;
35
36/*
37 * This class implements a Int8 Quantized GEMM/FC microbenchmark. There are a
38 * set of (m x k) * (k x n) = (m x n) matrix multiplications, chained together
39 * in multiple layers.
40 *
41 * Microbenchmarks are generally useful for understanding performance
42 * through targeted experiementation and are not representative of
43 * end-to-end workloads.
44 */
45// TODO: Move all the args passed by command line to LLVM options.
46llvm::cl::OptionCategory int8GemmBenchCat("Int8GemmBench Category");
47llvm::cl::opt<bool> checkCorrectness(
48 "check-results",
49 llvm::cl::desc("Check the correctness of the results against the reference "
50 "backend (Interpreter)"),
51 llvm::cl::Optional, llvm::cl::init(false), llvm::cl::cat(int8GemmBenchCat));
52
53struct Int8GemmParam {
54 dim_t m_;
55 dim_t n_;
56 dim_t k_;
57 dim_t numLayers_;
58 dim_t numReps_;
59 dim_t numAsyncLaunches_;
60 dim_t numSplits_;
61 std::string backendStr_;
62 std::string devId_;
63};
64
65class Int8GemmBench : public Benchmark {
66 Int8GemmParam param_;
67 ExecutionContext context_;
68 PlaceholderBindings &bindings_;
69 std::unique_ptr<runtime::HostManager> hostManager_;
70
71 // Refernce bindings and network:
72 ExecutionContext refContext_;
73 PlaceholderBindings &refBindings_;
74 std::unique_ptr<runtime::HostManager> refHostManager_;
75
76public:
77 explicit Int8GemmBench(Int8GemmParam param_)
78 : param_(param_), bindings_(*context_.getPlaceholderBindings()),
79 refBindings_(*refContext_.getPlaceholderBindings()) {}
80
81 void addInt8GemmNode(std::unique_ptr<Module> &mod, Function *fn,
82 Int8GemmParam param, bool isRef) {
83
84 PlaceholderBindings &bindings = isRef ? refBindings_ : bindings_;
85 auto *input = mod->createPlaceholder(ElemKind::Float16Ty,
86 {param.m_, param.k_}, "input", false);
87 bindings.allocate(input)->getHandle<float16>().randomize(-5.f, 5.f,
88 mod->getPRNG());
89 auto *output = mod->createPlaceholder(
90 ElemKind::Float16Ty, {param.m_, param.n_}, "output", false);
91 auto *q_input = fn->createQuantize(
92 "int8_quantize", input,
93 mod->uniqueType(ElemKind::Int8QTy, {param.m_, param.k_}, 1.0, 0));
94 Node *cur = q_input;
95
96 Placeholder *ones;
97 if (param.k_ > param.n_) {
98 ones = mod->createPlaceholder(ElemKind::Int8QTy,
99 {param.m_ * (param.k_ - param.n_)}, 1.0, 0,
100 "ones", false);
101 bindings.allocate(ones)->getHandle<int8_t>().clear(1);
102 }
103
104 Placeholder *weights;
105 Placeholder *bias;
106
107 // Create multiple layers of FC nodes
108 for (size_t layer = 0; layer < param.numLayers_; layer++) {
109 weights =
110 mod->createPlaceholder(ElemKind::Int8QTy, {param.k_, param.n_}, 1.0,
111 0, "weights" + std::to_string(layer), false);
112 bias = mod->createPlaceholder(ElemKind::Int32QTy, {param.n_}, 1.0, 0,
113 "bias" + std::to_string(layer), false);
114
115 bindings.allocate(weights)->getHandle<int8_t>().randomize(-128, 127,
116 mod->getPRNG());
117 bindings.allocate(bias)->getHandle<int32_t>().randomize(-128, 127,
118 mod->getPRNG());
119
120 Node *fc;
121 fc = fn->createFullyConnected("fc_" + std::to_string(layer), cur, weights,
122 bias);
123 cur = fc;
124
125 // Handle non-square cases
126 if (param.k_ > param.n_ && layer < (param.numLayers_ - 1)) {
127 Node *reshape1 = fn->createReshape("reshape1_" + std::to_string(layer),
128 fc, {param.m_ * param.n_});
129 Node *concat = fn->createConcat("concat_" + std::to_string(layer),
130 {reshape1, ones}, 0);
131 Node *reshape2 = fn->createReshape("reshape2_" + std::to_string(layer),
132 concat, {param.m_, param.k_});
133 cur = reshape2;
134 } else if (param.k_ < param.n_ && layer < (param.numLayers_ - 1)) {
135 Node *slice = fn->createSlice("slice_" + std::to_string(layer), fc,
136 {0, 0}, {param.m_, param.k_});
137 cur = slice;
138 }
139 }
140 auto *dequantized_fc = fn->createDequantize(
141 "int8_dequantize", cur,
142 mod->uniqueType(ElemKind::Float16Ty, {param.m_, param.n_}));
143 cur = dequantized_fc;
144 fn->createSave("save1", cur, output);
145 bindings.allocate(output);
146 ::glow::convertPlaceholdersToConstants(fn, bindings, {input, output});
147 }
148
149 void parallelize(Function *fn) {
150 // Model parallelize FCs
151 llvm::DenseMap<Node *, size_t> numOfChunks;
152 llvm::DenseMap<Node *, ParallelTransformKind> parOpts;
153 for (auto &N : fn->getNodes()) {
154 if (N.getKind() == Kinded::Kind::FullyConnectedNodeKind) {
155 numOfChunks[&N] = param_.numSplits_;
156 parOpts[&N] = ParallelTransformKind::Model;
157 }
158 }
159
160 // Parallelize Quantize/Dequantize
161 for (auto &N : fn->getNodes()) {
162 if (N.getKind() == Kinded::Kind::QuantizeNodeKind ||
163 N.getKind() == Kinded::Kind::DequantizeNodeKind) {
164 numOfChunks[&N] = param_.numSplits_;
165 parOpts[&N] = ParallelTransformKind::Data;
166 }
167 }
168 EXIT_ON_ERR(parallelizeOps(fn, numOfChunks, parOpts, 1));
169 }
170
171 void setup_internal(bool isRef) {
172 // Setup host manager
173 std::string backendStr = isRef ? "Interpreter" : param_.backendStr_.c_str();
174 std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
175 auto config = glow::make_unique<runtime::DeviceConfig>(backendStr);
176 if (param_.devId_ != "") {
177 config->parameters["DeviceID"] = param_.devId_.c_str();
178 }
179 configs.push_back(std::move(config));
180 if (isRef) {
181 refHostManager_ =
182 glow::make_unique<runtime::HostManager>(std::move(configs));
183 } else {
184 hostManager_ =
185 glow::make_unique<runtime::HostManager>(std::move(configs));
186 }
187
188 std::unique_ptr<Module> mod(new Module);
189 auto fn = mod->createFunction("singleNode");
190
191 addInt8GemmNode(mod, fn, param_, isRef);
192 parallelize(fn);
193 optimize(fn, CompilationMode::Infer);
194
195 CompilationContext ctx;
196 ctx.dumpFinalGraph = true;
197 if (isRef) {
198 EXIT_ON_ERR(refHostManager_->addNetwork(std::move(mod), ctx));
199 } else {
200 EXIT_ON_ERR(hostManager_->addNetwork(std::move(mod), ctx));
201 }
202 }
203
204 void setup() override {
205 if (checkCorrectness) {
206 setup_internal(/* isRef */ true);
207 }
208 setup_internal(/* isRef */ false);
209 }
210
211 void checkOutput() {
212 // First run on the reference backend
213 dispatchInference("singleNode", refHostManager_.get(), refContext_,
214 param_.numAsyncLaunches_,
215 /*useNewExecutionContext*/ true);
216 Tensor *refTensor =
217 refBindings_.get(refBindings_.getPlaceholderByNameSlow("output"));
218 CHECK(refTensor) << "Reference Tensor not found";
219
220 Tensor *noRefTensor =
221 bindings_.get(bindings_.getPlaceholderByNameSlow("output"));
222 CHECK(noRefTensor) << "non-reference Tensor not found";
223
224 // Compare the tensors
225 if (!noRefTensor->isEqual(*refTensor)) {
226 noRefTensor->dump();
227 refTensor->dump();
228 LOG(FATAL) << "Tensors don't match\n";
229 } else {
230 LOG(INFO) << "Tensors match\n";
231 }
232 }
233
234 void run() override {
235 dispatchInference("singleNode", hostManager_.get(), context_,
236 param_.numAsyncLaunches_,
237 /*useNewExecutionContext*/ true);
238 if (checkCorrectness) {
239 checkOutput();
240 }
241 }
242
243 void teardown() override {}
244
245 double gops() const {
246 return 2.0 * param_.m_ * param_.n_ * param_.k_ * param_.numLayers_ / 1e9;
247 }
248};
249
250#define DEVICE_ID 9
251
252Int8GemmParam parseArgs(int argc, char *argv[]) {
253 Int8GemmParam param;
254
255 param.m_ = atoi(argv[1]);
256 param.n_ = atoi(argv[2]);
257 param.k_ = atoi(argv[3]);
258 param.numLayers_ = atoi(argv[4]);
259 param.numReps_ = atoi(argv[5]);
260 param.numAsyncLaunches_ = atoi(argv[6]);
261 param.numSplits_ = atoi(argv[7]);
262 param.backendStr_ = std::string(argv[8]);
263
264 printf("m %zu\n", (size_t)param.m_);
265 printf("n %zu\n", (size_t)param.n_);
266 printf("k %zu\n", (size_t)param.k_);
267 printf("numLayers %zu\n", (size_t)param.numLayers_);
268 printf("numReps %zu\n", (size_t)param.numReps_);
269 printf("numAsyncLaunches %zu\n", (size_t)param.numAsyncLaunches_);
270 printf("numSplits %zu\n", (size_t)param.numSplits_);
271 printf("backendStr %s\n", param.backendStr_.c_str());
272
273 if (argc > DEVICE_ID) {
274 printf("devId %s\n", argv[DEVICE_ID]);
275 param.devId_ = std::string(argv[DEVICE_ID]);
276 } else {
277 param.devId_ = std::string("");
278 }
279 printf("\n\n");
280 return param;
281}
282
283int main(int argc, char *argv[]) {
284 printf("GEMM Microbenchmark\n");
285 printf("Usage: GemmBench m(Int) n(Int) k(Int) numLayers(Int) numReps(Int) "
286 "numAsyncLaunches(Int) numSplits(Int) backendStr(String) "
287 "dev_id(Int)\n");
288 printf("Standard Glow command-line options may be passed via the GLOW_OPTS "
289 "environment variable\n");
290 benchParseGlowOpts(argc, argv);
291
292 std::vector<Int8GemmParam> params;
293 std::string runHeader;
294 std::string runPrefix;
295
296 // Using a config file
297 if (argc == 2) {
298 auto fname = std::string(argv[1]);
299 std::ifstream fin(fname.c_str());
300 if (!fin) {
301 std::cout << "Could not open file: " << fname << std::endl;
302 exit(0);
303 }
304 std::string line;
305 while (getline(fin, line)) {
306 std::array<char, 1024> buf;
307 char *saveptr = nullptr;
308 std::vector<char *> argVec;
309 strcpy(buf.data(), line.c_str());
310 char *ptr = strtok_r(buf.data(), " ", &saveptr);
311 while (ptr != nullptr) {
312 argVec.push_back(ptr);
313 ptr = strtok_r(nullptr, " ", &saveptr);
314 }
315 Int8GemmParam param = parseArgs(argVec.size(), argVec.data());
316 params.push_back(param);
317 runHeader = std::string("_,benchName,_,filename");
318 runPrefix = std::string(strFormat("GemmBench,SW,%s", fname.c_str()));
319 }
320 } else if (argc == 9 || argc == 10) {
321 Int8GemmParam param = parseArgs(argc, argv);
322 params.push_back(param);
323 runHeader = std::string(
324 "_,benchName,_,m,n,k,numLayers,numReps,numAsyncLaunches,numSplits,"
325 "backendStr\n");
326 runPrefix = std::string(strFormat(
327 "GemmBench,SW,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%s", (size_t)param.m_,
328 (size_t)param.n_, (size_t)param.k_, (size_t)param.numLayers_,
329 (size_t)param.numReps_, (size_t)param.numAsyncLaunches_,
330 (size_t)param.numSplits_, argv[8]));
331 } else {
332 llvm_unreachable("Invalid command line");
333 }
334
335 for (auto param : params) {
336 Int8GemmBench b(param);
337 auto times = bench(&b, param.numReps_);
338
339 printf("%s,runtime,gflopPerSec\n", runHeader.c_str());
340 for (auto t : times) {
341 printf("BenchResult,%s,%f,%f\n", runPrefix.c_str(),
342 t / param.numAsyncLaunches_,
343 b.gops() * param.numAsyncLaunches_ / t);
344 }
345 double min = *(std::min_element(times.begin(), times.end()));
346 dim_t midElt = times.size() / 2;
347 std::nth_element(times.begin(), times.begin() + midElt, times.end());
348 double median = times[midElt];
349 double medianRuntime = median / ((double)param.numAsyncLaunches_);
350 double minRuntime = min / ((double)param.numAsyncLaunches_);
351 printf("%s,medianRuntime,minRuntime,medianGflopPerSec,maxGflopPerSec\n",
352 runHeader.c_str());
353 printf("BenchSummary,%s,%f,%f,%f,%f\n", runPrefix.c_str(), medianRuntime,
354 minRuntime, b.gops() / medianRuntime, b.gops() / minRuntime);
355 }
356}
357