1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "CPUBackend.h"
18#include "CPUFunction.h"
19#include "CPULLVMIRGen.h"
20
21#include "glow/Backend/BackendUtils.h"
22#include "glow/Graph/Graph.h"
23#include "glow/IR/Instrs.h"
24#include "glow/LLVMIRCodeGen/LLVMIRGen.h"
25#include "glow/Support/Debug.h"
26
27#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/LLVMContext.h"
31
32#include <numeric>
33
34using namespace glow;
35
36CPUBackend::CPUBackend() {
37 /// If target is not explicitly given we use the host attributes.
38 auto &opts = getOptions();
39 if (opts.getTarget().empty()) {
40 opts.setTarget(LLVMBackend::getHostTarget());
41 opts.setCPU(LLVMBackend::getHostCPU());
42 opts.setTargetFeatures(LLVMBackend::getHostFeatures());
43 }
44}
45
46/// We compile the standard library (libjit) to LLVM bitcode, and then convert
47/// that binary data to an include file using an external utility (include-bin).
48/// The resulting file is included here to compile the bitcode image into our
49/// library.
50static const unsigned char libjit_bc[] = {
51#include "glow/libjit/libjit_cpu.inc"
52};
53static const size_t libjit_bc_size = sizeof(libjit_bc);
54
55bool CPUBackend::isOpSupported(const NodeInfo &NI) const {
56 switch (NI.getKind()) {
57
58 case Kinded::Kind::CPUMaxSplatNodeKind:
59 return NI.allInputsAndOutputsHaveSameElemKind(
60 {ElemKind::FloatTy, ElemKind::Int8QTy});
61
62 case Kinded::Kind::CPUConvDKKC8NodeKind:
63 return NI.allInputsAndOutputsHaveSameElemKind({ElemKind::FloatTy});
64
65 // Delegate everything else to the LLVM backend.
66 default:
67 return LLVMBackend::isOpSupported(NI);
68 }
69}
70
71bool CPUBackend::shouldLower(const Node *N) const {
72 switch (N->getKind()) {
73 case Kinded::Kind::ReluNodeKind:
74 case Kinded::Kind::ClipNodeKind:
75 case Kinded::Kind::LeakyReluNodeKind:
76 case Kinded::Kind::FullyConnectedNodeKind:
77 case Kinded::Kind::ConvolutionNodeKind:
78 case Kinded::Kind::SparseLengthsSumNodeKind:
79 return false;
80 default:
81 return true;
82 }
83}
84
85bool CPUBackend::supportsFusedActivation(Node *parent, Node *activation) const {
86 // CPU backend only supports fusing activations into Convolution and
87 // ChannelwiseQuantizedConvolution.
88 if (!llvm::isa<ConvolutionNode>(parent) &&
89 !llvm::isa<ChannelwiseQuantizedConvolutionNode>(parent)) {
90 return false;
91 }
92
93 // Only the following activations can be fused.
94 // Additionally Tanh/Sigmoid are fused only for floating-point type. For
95 // quantized type Lookup Tables should be used instead.
96 switch (activation->getKind()) {
97 case Kinded::Kind::ReluNodeKind:
98 case Kinded::Kind::ClipNodeKind:
99 case Kinded::Kind::LeakyReluNodeKind:
100 return true;
101 case Kinded::Kind::SigmoidNodeKind:
102 return llvm::cast<SigmoidNode>(activation)
103 ->getResult()
104 .getType()
105 ->isFPType();
106 case Kinded::Kind::TanhNodeKind:
107 return llvm::cast<TanhNode>(activation)->getResult().getType()->isFPType();
108 default:
109 return false;
110 }
111}
112
113unsigned CPUBackend::numDevices() {
114 return std::thread::hardware_concurrency();
115}
116
117std::vector<unsigned> CPUBackend::scanDeviceIDs() {
118 std::vector<unsigned> deviceIDs(CPUBackend::numDevices());
119 std::iota(std::begin(deviceIDs), std::end(deviceIDs), 0);
120 return deviceIDs;
121}
122
123std::unique_ptr<CompiledFunction> CPUBackend::createCompiledFunction(
124 std::unique_ptr<GlowJIT> JIT,
125 runtime::RuntimeBundle &&runtimeBundle) const {
126 return glow::make_unique<CPUFunction>(std::move(JIT),
127 std::move(runtimeBundle));
128}
129
130std::unique_ptr<LLVMIRGen>
131CPUBackend::createIRGen(const IRFunction *IR,
132 AllocationsInfo &allocationsInfo) const {
133 CPULLVMIRGen *irgen = new CPULLVMIRGen(
134 IR, allocationsInfo, "", getLibjitBitcode(), getObjectRegistry());
135 return std::unique_ptr<CPULLVMIRGen>(irgen);
136}
137
138llvm::StringRef CPUBackend::getLibjitBitcode() const {
139 return llvm::StringRef(reinterpret_cast<const char *>(libjit_bc),
140 libjit_bc_size);
141}
142
143/// \returns true if network supports Type Lowering from \p T1 to \p T2.
144/// Populates PrecisionConfiguration with black list of operations that can't be
145/// converted.
146bool CPUBackend::canDoIndexTypeDemotion(
147 ElemKind fromTy, ElemKind toTy, PrecisionConfiguration &precConfig) const {
148 precConfig.precisionModeKindSet.insert(Kinded::Kind::EmbeddingBagNodeKind);
149 precConfig.precisionModeKindSet.insert(
150 Kinded::Kind::EmbeddingBagByteRowwiseOffsetsNodeKind);
151 precConfig.precisionModeKindSet.insert(
152 Kinded::Kind::FusedRowwiseQuantizedSparseLengthsSumNodeKind);
153 precConfig.precisionModeKindSet.insert(
154 Kinded::Kind::FusedRowwiseQuantizedSparseLengthsWeightedSumNodeKind);
155 precConfig.precisionModeKindSet.insert(
156 Kinded::Kind::SparseToDenseMaskNodeKind);
157 return fromTy == ElemKind::Int64ITy && toTy == ElemKind::Int32ITy;
158}
159
160#if FACEBOOK_INTERNAL
161llvm::ArrayRef<llvm::MemoryBufferRef> CPUBackend::getObjectRegistry() const {
162 return llvm::ArrayRef<llvm::MemoryBufferRef>();
163}
164#else
165#include "cpuObjectRegistry.h"
166llvm::ArrayRef<llvm::MemoryBufferRef> CPUBackend::getObjectRegistry() const {
167 return cpuObjectRegistry;
168}
169#endif
170