1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_TESTS_BACKENDTESTUTILS_H
17#define GLOW_TESTS_BACKENDTESTUTILS_H
18
19#include "glow/Backend/Backend.h"
20#include "glow/ExecutionEngine/ExecutionEngine.h"
21#include "glow/Graph/Graph.h"
22#include "glow/Graph/Node.h"
23#include "glow/IR/IR.h"
24#include "glow/IR/IRBuilder.h"
25#include "glow/IR/IRGen.h"
26#include "glow/Quantization/Base/Base.h"
27
28#include "llvm/Support/Casting.h"
29
30#include "gtest/gtest.h"
31#include <initializer_list>
32
33namespace glow {
34
35extern unsigned parCloneCountOpt;
36
37// INSTANTIATE_TEST_CASE_P is deprecated in gtest v1.10.0. For now use it still
38// internally.
39#if FACEBOOK_INTERNAL
40#define GLOW_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
41#else
42#define GLOW_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
43#endif /* FACEBOOK_INTERNAL */
44
45// A test harness to enable a test case for specific backends. A test suite
46// should subclass this and instantiate it as follows:
47//
48// class OperationTest : public BackendTest {
49// ...
50// };
51//
52// GLOW_INSTANTIATE_TEST_SUITE_P_FOR_BACKEND_TEST(Prefix, OperationTest);
53//
54// A test case is defined using TEST_P(), and ENABLED_BACKENDS() can be used
55// to whitelist certain backends for the test. The absence of ENABLED_BACKENDS()
56// enables the test for all available backends:
57//
58//
59// TEST_P(OperationTest, replaceNaN) {
60// // Enable this test case only for Interpreter and CPU.
61// ENABLED_BACKENDS(Interpreter, CPU);
62// // Regular test code.
63// ...
64// }
65#define DECLARE_STATELESS_BACKEND_TEST(CLASS_NAME, CONFIG_NAME) \
66 class CLASS_NAME : public ::testing::TestWithParam<CONFIG_NAME> { \
67 protected: \
68 bool isEnabledBackend(const std::set<std::string> &enabledBackends) { \
69 return enabledBackends.find(getBackendName()) != enabledBackends.end(); \
70 } \
71 \
72 public: \
73 std::string getBackendName() { return std::get<0>(GetParam()); } \
74 }
75
76/// Note that we use std::tuple<std::string> here to match other tests which are
77/// parameterized across many other values, e.g. those in ParameterSweepTest.
78DECLARE_STATELESS_BACKEND_TEST(BackendStatelessTest, std::tuple<std::string>);
79
80/// Whether to ignore the blacklist and run disabled tests.
81extern bool runDisabledTests;
82
83class BackendTest : public BackendStatelessTest {
84public:
85 BackendTest(uint64_t deviceMemory = 0)
86 : EE_(getBackendName(), deviceMemory), mod_(EE_.getModule()) {
87 F_ = mod_.createFunction("main");
88 }
89
90protected:
91 ExecutionEngine EE_{getBackendName()};
92 Module &mod_;
93 Function *F_;
94};
95
96/// Stringify a macro def.
97#define BACKEND_TO_STR(X) #X
98
99#ifdef GLOW_TEST_BACKEND
100#define STRINGIZE(X) BACKEND_TO_STR(X)
101#define ALL_BACKENDS ::testing::Values(STRINGIZE(GLOW_TEST_BACKEND))
102#else
103#define ALL_BACKENDS ::testing::ValuesIn(getAvailableBackends())
104#endif
105
106// Instantiate parameterized test suite with all available backends.
107#define GLOW_INSTANTIATE_TEST_SUITE_P_FOR_BACKEND_TEST(prefix, test_case_name) \
108 GLOW_INSTANTIATE_TEST_SUITE_P(prefix, test_case_name, ALL_BACKENDS)
109
110// Instantiate parameterized test suite with all available backends.
111#define GLOW_INSTANTIATE_TEST_SUITE_P_FOR_BACKEND_COMBINED_TEST( \
112 prefix, test_case_name, combine) \
113 GLOW_INSTANTIATE_TEST_SUITE_P(prefix, test_case_name, \
114 ::testing::Combine(ALL_BACKENDS, combine))
115
116// TODO: Replace return for GTEST_SKIP() so that skipped tests are
117// correctly reported once the macro gets available.
118#define ENABLED_BACKENDS(...) \
119 if (!runDisabledTests && !isEnabledBackend({__VA_ARGS__})) \
120 GTEST_SKIP();
121
122/// Blacklist of tests for the current backend under test.
123extern std::set<std::string> backendTestBlacklist;
124
125/// Bool for whether to use symmetric quantization for rowwise-quantized FCs.
126extern bool useSymmetricRowwiseQuantFC;
127
128/// Intermediate layer of macros to make expansion of defs work correctly.
129#define INSTANTIATE_TEST_INTERNAL(B, T) \
130 GLOW_INSTANTIATE_TEST_SUITE_P(B, T, ::testing::Values(BACKEND_TO_STR(B)));
131
132/// Instantate a test suite for the backend specified by GLOW_TEST_BACKEND.
133/// Usually this macro will be defined by the build system, to avoid tightly
134/// coupling the existing set of backends to the source.
135#define INSTANTIATE_BACKEND_TEST(T) \
136 INSTANTIATE_TEST_INTERNAL(GLOW_TEST_BACKEND, T);
137
138/// Helper macro to check the current test against the blacklist.
139#define CHECK_IF_ENABLED() \
140 if (!runDisabledTests && \
141 backendTestBlacklist.count( \
142 ::testing::UnitTest::GetInstance()->current_test_info()->name())) \
143 GTEST_SKIP();
144
145class NumericsTest : public BackendTest {
146protected:
147 PlaceholderBindings bindings_;
148};
149
150class GraphOptz : public ::testing::Test {
151public:
152 GraphOptz(llvm::StringRef backendName = "Interpreter")
153 : EE_(backendName), mod_(EE_.getModule()) {
154 F_ = mod_.createFunction("main");
155 }
156
157protected:
158 virtual void checkNumericalEquivalence(float allowedError = 0.0001) {
159 // Check that the function and its optimized complement exist.
160 ASSERT_TRUE(F_);
161 ASSERT_TRUE(optimizedF_);
162
163 // Check that the bindings are not empty. If they are, the numerical
164 // equivalence check can produce a false positive.
165 EXPECT_GT(bindings_.getDataSize(), 0);
166
167 // Allocate any leftover PHs; these are usually for SaveNodes.
168 bindings_.allocate(mod_.getPlaceholders());
169
170 // Clone bindings to use for original and optimized functions.
171 PlaceholderBindings originalBindings = bindings_.clone();
172 PlaceholderBindings optimizedBindings = bindings_.clone();
173
174 // Compile and run functions. Only lower Functions; we do not want to
175 // optimize the unoptimized Function, and the optimized Function has, well,
176 // already been optimized.
177 if (!alreadyCompiled_) {
178 cctx_.optimizationOpts.onlyLowerFuns.insert(F_);
179 cctx_.optimizationOpts.onlyLowerFuns.insert(optimizedF_);
180 EE_.compile(cctx_);
181 }
182 EE_.run(originalBindings, F_->getName());
183 EE_.run(optimizedBindings, optimizedF_->getName());
184
185 // Compare outputs.
186 EXPECT_TRUE(PlaceholderBindings::compare(&originalBindings,
187 &optimizedBindings, allowedError));
188 }
189
190 /// Verify the module is still valid at the end of the test.
191 virtual void TearDown() override { EXPECT_TRUE(mod_.verify()); }
192
193 /// ExecutionEngine instance for running functions to check numerical
194 /// equivalence.
195 ExecutionEngine EE_;
196 /// A reference to the Module inside EE_.
197 Module &mod_;
198 /// The original Function for the test case.
199 Function *F_{nullptr};
200 /// The optimized Function for the test case.
201 Function *optimizedF_{nullptr};
202 /// The bindings used to check numerical equivalence for the test case.
203 PlaceholderBindings bindings_;
204 /// CompilationContext used for all Functions in \ref mod_.
205 CompilationContext cctx_;
206 /// Whether \ref mod_ has already been compiled.
207 bool alreadyCompiled_{false};
208};
209
210/// MockBackend used only for unit testing.
211class MockBackend : public Backend {
212public:
213 class MockFunction : public CompiledFunction {
214 public:
215 MockFunction(runtime::RuntimeBundle &&bundle)
216 : CompiledFunction(std::move(bundle)) {}
217
218 Error execute(ExecutionContext *) override { return Error::success(); }
219
220 std::string getCompileBackendName() const override { return "MockBackend"; }
221 };
222
223 std::string getBackendName() const override { return "MockBackend"; }
224
225 Expected<std::unique_ptr<CompiledFunction>>
226 compile(Function *F, const BackendOptions &) const override {
227 return glow::make_unique<MockFunction>(runtime::RuntimeBundle::create(*F));
228 }
229
230 bool isOpSupported(const NodeInfo &NI) const override { return false; }
231
232 bool generateInst(Node *N, IRGenVisitor &irgen) const override {
233 return false;
234 }
235
236 runtime::DeviceManager *
237 createDeviceManager(const runtime::DeviceConfig &deviceConfig) override {
238 return nullptr;
239 }
240};
241
242/// MockBackendCustomIRGen used only for unit testing to test custom lowering
243/// from Node to Instruction IR.
244class MockBackendCustomIRGen : public Backend {
245 class MockFunction : public CompiledFunction {
246 public:
247 MockFunction(runtime::RuntimeBundle &&bundle)
248 : CompiledFunction(std::move(bundle)) {}
249
250 Error execute(ExecutionContext *) override { return Error::success(); }
251
252 std::string getCompileBackendName() const override { return "Interpreter"; }
253 };
254
255 std::string getBackendName() const override { return "Interpreter"; }
256
257 Expected<std::unique_ptr<CompiledFunction>>
258 compile(Function *F, const BackendOptions &) const override {
259 return glow::make_unique<MockFunction>(runtime::RuntimeBundle::create(*F));
260 }
261
262 bool isOpSupported(const NodeInfo &NI) const override { return false; }
263
264 runtime::DeviceManager *
265 createDeviceManager(const runtime::DeviceConfig &deviceConfig) override {
266 return nullptr;
267 }
268
269 bool generateInst(Node *N, IRGenVisitor &irgen) const override {
270 bool hasChanged = false;
271 auto builder_ = irgen.getBuilder();
272 switch (N->getKind()) {
273 case glow::Kinded::Kind::ConvolutionNodeKind: {
274 auto *CN__ = llvm::cast<ConvolutionNode>(N);
275 auto *Src = irgen.valueForNode(CN__->getInput());
276 auto *Filter = irgen.valueForNode(CN__->getFilter());
277 auto *Bias = irgen.valueForNode(CN__->getBias());
278 std::string allocName = std::string(N->getName()) + ".res";
279 auto *Dest__ = builder_->createAllocActivationInst(
280 allocName, CN__->getResult().getType());
281 auto *V = builder_->createConvolutionInst(
282 "CustomConvolutionInstruction", Dest__, Src, Filter, Bias,
283 CN__->getKernels(), CN__->getStrides(), CN__->getPads(),
284 CN__->getGroup(), CN__->getDilation(), CN__->getLayout(),
285 CN__->getFusedActivation(), {});
286 if (N->hasPredicate()) {
287 V->setPredicate(irgen.valueForNode(N->getPredicate()));
288 }
289 irgen.registerIR(CN__->getResult(), V->getDest());
290 irgen.setNodeToIR(N, V);
291 hasChanged = true;
292 break;
293 }
294 default:
295 break;
296 }
297 return hasChanged;
298 }
299};
300
301/// Pair representing a pointer to a Function with a single output, and the
302/// allocated Tensor that backs the Placeholder of the single output.
303using FunctionTensorPair = std::pair<Function *, Tensor *>;
304
305/// Signature of functions used to create and init a Function. Returns a pair of
306/// the Function created and the Placeholder of the output of the Function.
307using CreateAndInitFunction =
308 std::function<FunctionTensorPair(PlaceholderBindings &, ExecutionEngine &)>;
309
310Placeholder *createPlaceholder(Module &mod, PlaceholderBindings &bindings,
311 Tensor *tensor, llvm::StringRef name,
312 const std::string &layout = ANY_LAYOUT);
313
314// Comparison for tensors that can't be expected to be bitwise identical
315template <typename DataType>
316void compare(Tensor &expected, Tensor &result, float absoluteTolerance,
317 float maxRMSE) {
318 float sumOfSquareOfErrors = 0;
319 auto expectedHandle = expected.getHandle<DataType>();
320 auto resultHandle = result.getHandle<DataType>();
321
322 for (unsigned index = 0; index < expected.size(); index++) {
323 auto expectedValue = float(expectedHandle.raw(index));
324 auto resultValue = float(resultHandle.raw(index));
325 EXPECT_NEAR(resultValue, expectedValue, absoluteTolerance);
326 sumOfSquareOfErrors += pow(resultValue - expectedValue, 2);
327 }
328 EXPECT_LE(std::sqrt(sumOfSquareOfErrors) / expected.size(), maxRMSE);
329}
330
331/// Given a method \p createAndInitFunction that creates and initializes a
332/// FloatTy Function with a single output Tensor, \returns a bool representing
333/// if the output Tensor of executing the Function on the Interpreter backend is
334/// equal to executing it on a backend \p backendName. \p interpElemKind
335/// and \p backendElemKind represent the desired ElemKinds for their respective
336/// functions to use. If either require quantization then a profile will first
337/// be gathered on the Interpreter, and then that profile will be used to
338/// quantize one or both. Otherwise if either is Float16Ty then the respective
339/// Function it will be converted using the Converter. If
340/// \p convertToRowwiseQuantization then nodes supporting rowwise quantization
341/// will converted to use it. \p schema represents the quantization schema to
342/// use, if applicable. \p parallelCount represents the number of times to clone
343/// the Function inside itself, so that testing can be done on architectures
344/// that have parallel compute engines. The bias is quantized using the
345/// precision \p biasElemKind. \p forceFP16AccumSLS and \p float16Format are
346/// propagated into the precision config for compilation. If \p
347/// convertToChannelwiseQuantization is enabled then nodes supporting
348/// channelwise quantization will be converted. If \p skipQuantizeFCBias then
349/// don't apply quantization to FC bias inputs.
350void compareAgainstInterpreter(
351 llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction,
352 ElemKind interpElemKind, ElemKind backendElemKind,
353 float allowedError = 0.0001, unsigned parallelCount = 1,
354 bool convertToRowwiseQuantization = false,
355 quantization::Schema schema = quantization::Schema::Asymmetric,
356 ElemKind biasElemKind = ElemKind::Int32QTy, bool forceFP16AccumSLS = false,
357 PrecisionConfiguration::Float16Format float16Format =
358 PrecisionConfiguration::Float16Format::FP16,
359 bool convertToChannelwiseQuantization = false,
360 bool skipQuantizeFCBias = false);
361
362/// Given some \p FTP representing a Function with a single SaveNode and its
363/// Tensor output, duplicate the Nodes in the Function and their Placeholder
364/// inputs given \p bindings \p parallelCount times. \returns a set of Tensor
365/// pointers for each output of the cloned Function. If the quantization node
366/// info found in \p cctx exists, then all of the node infos will be cloned
367/// accordingly with the names of the newly cloned nodes added to the Function.
368std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP,
369 PlaceholderBindings *bindings,
370 CompilationContext &cctx,
371 unsigned parallelCount);
372
373/// \returns the number of nodes in \p F of kind \p kind.
374unsigned countNodeKind(Function *F, Kinded::Kind kind);
375
376void inferConvNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
377 llvm::StringRef kind);
378
379int inferConvReluNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
380 unsigned_t kernel, unsigned_t stride, unsigned_t pad,
381 llvm::StringRef kind);
382
383void trainConvNet(Tensor *inputs, Tensor *kernel1, Tensor *bias1,
384 Tensor *kernel2, Tensor *bias2, Tensor *selected,
385 llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2,
386 Tensor *out, llvm::StringRef kind);
387
388void inferLocalResponseNormalizationNet(Tensor *inputs, Tensor *out,
389 llvm::StringRef kind);
390
391void trainLocalResponseNormalizationNet(Tensor *inputs, Tensor *weights,
392 Tensor *bias, Tensor *selected,
393 llvm::ArrayRef<dim_t> shape1,
394 llvm::ArrayRef<dim_t> shape2,
395 Tensor *out, llvm::StringRef kind);
396void trainAvgPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
397 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
398 llvm::ArrayRef<dim_t> shape2, Tensor *out,
399 llvm::StringRef kind);
400
401void trainMaxPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
402 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
403 llvm::ArrayRef<dim_t> shape2, Tensor *out,
404 llvm::StringRef kind);
405
406void inferIntLookupTableNetInt8(Tensor *input, Tensor *out,
407 llvm::ArrayRef<int8_t> table,
408 llvm::StringRef kind);
409
410void inferIntLookupTableNetInt16(Tensor *input, Tensor *out,
411 llvm::ArrayRef<int16_t> table,
412 llvm::StringRef kind);
413
414void inferGroupConv(Tensor *out, llvm::StringRef kind);
415
416void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind);
417
418void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind);
419
420void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind);
421
422void inferConvDKKC8(Tensor *out, llvm::StringRef kind);
423
424void inferSmallConv(Tensor *inputs, Tensor *out, llvm::StringRef kind);
425
426void trainSoftMaxNet(Tensor *inputs, Tensor *weights, Tensor *bias,
427 Tensor *selected, Tensor *out, llvm::StringRef kind);
428
429void inferBasicConvNet(Tensor *inputs, Tensor *out, llvm::StringRef kind,
430 size_t convDepth);
431
432void inferTanhConcatNet(Tensor *input1, Tensor *input2, Tensor *input3,
433 Tensor *out, llvm::StringRef kind);
434
435FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings,
436 ExecutionEngine &EE);
437
438void inferMixedNet(Tensor *inputs, Tensor *out, llvm::StringRef kind);
439
440void inferComplexNet1(Tensor *inputs1, Tensor *inputs2, Tensor *inputs3,
441 Tensor *inputs4, Tensor *out, llvm::StringRef kind);
442
443void inferTinyResnet(Tensor *input, Tensor *out, std::vector<Tensor> &weights,
444 llvm::StringRef kind);
445
446void inferExtract3D(Tensor *input, Tensor *out, llvm::StringRef kind);
447
448void inferMaxSplat(Tensor *input, Tensor *out, llvm::StringRef kind);
449
450/// A helper method to insert a compiledFunction \p func into the deviceManager
451/// \p device.
452void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func,
453 runtime::DeviceManager *device, Module *mod);
454
455/// A helper method to run the specified function \p name with the provided
456/// ExecutionContext \p context on the specified DeviceManager \p device.
457void runOnDevice(ExecutionContext &context, llvm::StringRef name,
458 runtime::DeviceManager *device);
459
460/// Returns a new Constant of type UInt8FusedQTy with fused rowwise
461/// quantization scales and offsets (i.e. the last 8 bytes of each row
462/// contains the scale and offset).
463Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod,
464 llvm::ArrayRef<dim_t> dims,
465 llvm::StringRef name,
466 bool useFusedFP16 = false);
467
468Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod,
469 llvm::ArrayRef<dim_t> dims,
470 llvm::StringRef name,
471 bool useFusedFP16 = false);
472
473/// Returns a new Constant, of the provided \p type and \p dims initialized
474/// with random data. If using floating point, then it is initialized via
475/// Xavier with filterSize equal to twice the number of elements in \p dims.
476/// Otherwise integer types are initialzed via their min and max values.
477Constant *createRandomizedConstant(Module &mod, TypeRef type,
478 llvm::ArrayRef<dim_t> dims,
479 llvm::StringRef name);
480
481/// Helper method to wrap dispatching an inference on function \p fname request
482/// to a \p Hostmanager as a synchronous interface. If \p concurrentRequestsOpt
483/// is set it will duplicate the request to send multiple requests concurrently.
484void dispatchInference(const std::string &fname,
485 runtime::HostManager *hostManager,
486 ExecutionContext &context,
487 unsigned concurrentRequestsOpt,
488 bool useNewExecutionContext = false);
489} // namespace glow
490
491#endif // GLOW_TESTS_BACKENDTESTUTILS_H
492