1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "CPUBackend.h" |
18 | #include "CPUFunction.h" |
19 | #include "CPULLVMIRGen.h" |
20 | |
21 | #include "glow/Backend/BackendUtils.h" |
22 | #include "glow/Graph/Graph.h" |
23 | #include "glow/IR/Instrs.h" |
24 | #include "glow/LLVMIRCodeGen/LLVMIRGen.h" |
25 | #include "glow/Support/Debug.h" |
26 | |
27 | #include "llvm/ADT/STLExtras.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/IR/IRBuilder.h" |
30 | #include "llvm/IR/LLVMContext.h" |
31 | |
32 | #include <numeric> |
33 | |
34 | using namespace glow; |
35 | |
36 | CPUBackend::CPUBackend() { |
37 | /// If target is not explicitly given we use the host attributes. |
38 | auto &opts = getOptions(); |
39 | if (opts.getTarget().empty()) { |
40 | opts.setTarget(LLVMBackend::getHostTarget()); |
41 | opts.setCPU(LLVMBackend::getHostCPU()); |
42 | opts.setTargetFeatures(LLVMBackend::getHostFeatures()); |
43 | } |
44 | } |
45 | |
46 | /// We compile the standard library (libjit) to LLVM bitcode, and then convert |
47 | /// that binary data to an include file using an external utility (include-bin). |
48 | /// The resulting file is included here to compile the bitcode image into our |
49 | /// library. |
50 | static const unsigned char libjit_bc[] = { |
51 | #include "glow/libjit/libjit_cpu.inc" |
52 | }; |
53 | static const size_t libjit_bc_size = sizeof(libjit_bc); |
54 | |
55 | bool CPUBackend::isOpSupported(const NodeInfo &NI) const { |
56 | switch (NI.getKind()) { |
57 | |
58 | case Kinded::Kind::CPUMaxSplatNodeKind: |
59 | return NI.allInputsAndOutputsHaveSameElemKind( |
60 | {ElemKind::FloatTy, ElemKind::Int8QTy}); |
61 | |
62 | case Kinded::Kind::CPUConvDKKC8NodeKind: |
63 | return NI.allInputsAndOutputsHaveSameElemKind({ElemKind::FloatTy}); |
64 | |
65 | // Delegate everything else to the LLVM backend. |
66 | default: |
67 | return LLVMBackend::isOpSupported(NI); |
68 | } |
69 | } |
70 | |
71 | bool CPUBackend::shouldLower(const Node *N) const { |
72 | switch (N->getKind()) { |
73 | case Kinded::Kind::ReluNodeKind: |
74 | case Kinded::Kind::ClipNodeKind: |
75 | case Kinded::Kind::LeakyReluNodeKind: |
76 | case Kinded::Kind::FullyConnectedNodeKind: |
77 | case Kinded::Kind::ConvolutionNodeKind: |
78 | case Kinded::Kind::SparseLengthsSumNodeKind: |
79 | return false; |
80 | default: |
81 | return true; |
82 | } |
83 | } |
84 | |
85 | bool CPUBackend::supportsFusedActivation(Node *parent, Node *activation) const { |
86 | // CPU backend only supports fusing activations into Convolution and |
87 | // ChannelwiseQuantizedConvolution. |
88 | if (!llvm::isa<ConvolutionNode>(parent) && |
89 | !llvm::isa<ChannelwiseQuantizedConvolutionNode>(parent)) { |
90 | return false; |
91 | } |
92 | |
93 | // Only the following activations can be fused. |
94 | // Additionally Tanh/Sigmoid are fused only for floating-point type. For |
95 | // quantized type Lookup Tables should be used instead. |
96 | switch (activation->getKind()) { |
97 | case Kinded::Kind::ReluNodeKind: |
98 | case Kinded::Kind::ClipNodeKind: |
99 | case Kinded::Kind::LeakyReluNodeKind: |
100 | return true; |
101 | case Kinded::Kind::SigmoidNodeKind: |
102 | return llvm::cast<SigmoidNode>(activation) |
103 | ->getResult() |
104 | .getType() |
105 | ->isFPType(); |
106 | case Kinded::Kind::TanhNodeKind: |
107 | return llvm::cast<TanhNode>(activation)->getResult().getType()->isFPType(); |
108 | default: |
109 | return false; |
110 | } |
111 | } |
112 | |
113 | unsigned CPUBackend::numDevices() { |
114 | return std::thread::hardware_concurrency(); |
115 | } |
116 | |
117 | std::vector<unsigned> CPUBackend::scanDeviceIDs() { |
118 | std::vector<unsigned> deviceIDs(CPUBackend::numDevices()); |
119 | std::iota(std::begin(deviceIDs), std::end(deviceIDs), 0); |
120 | return deviceIDs; |
121 | } |
122 | |
123 | std::unique_ptr<CompiledFunction> CPUBackend::createCompiledFunction( |
124 | std::unique_ptr<GlowJIT> JIT, |
125 | runtime::RuntimeBundle &&runtimeBundle) const { |
126 | return glow::make_unique<CPUFunction>(std::move(JIT), |
127 | std::move(runtimeBundle)); |
128 | } |
129 | |
130 | std::unique_ptr<LLVMIRGen> |
131 | CPUBackend::createIRGen(const IRFunction *IR, |
132 | AllocationsInfo &allocationsInfo) const { |
133 | CPULLVMIRGen *irgen = new CPULLVMIRGen( |
134 | IR, allocationsInfo, "" , getLibjitBitcode(), getObjectRegistry()); |
135 | return std::unique_ptr<CPULLVMIRGen>(irgen); |
136 | } |
137 | |
138 | llvm::StringRef CPUBackend::getLibjitBitcode() const { |
139 | return llvm::StringRef(reinterpret_cast<const char *>(libjit_bc), |
140 | libjit_bc_size); |
141 | } |
142 | |
143 | /// \returns true if network supports Type Lowering from \p T1 to \p T2. |
144 | /// Populates PrecisionConfiguration with black list of operations that can't be |
145 | /// converted. |
146 | bool CPUBackend::canDoIndexTypeDemotion( |
147 | ElemKind fromTy, ElemKind toTy, PrecisionConfiguration &precConfig) const { |
148 | precConfig.precisionModeKindSet.insert(Kinded::Kind::EmbeddingBagNodeKind); |
149 | precConfig.precisionModeKindSet.insert( |
150 | Kinded::Kind::EmbeddingBagByteRowwiseOffsetsNodeKind); |
151 | precConfig.precisionModeKindSet.insert( |
152 | Kinded::Kind::FusedRowwiseQuantizedSparseLengthsSumNodeKind); |
153 | precConfig.precisionModeKindSet.insert( |
154 | Kinded::Kind::FusedRowwiseQuantizedSparseLengthsWeightedSumNodeKind); |
155 | precConfig.precisionModeKindSet.insert( |
156 | Kinded::Kind::SparseToDenseMaskNodeKind); |
157 | return fromTy == ElemKind::Int64ITy && toTy == ElemKind::Int32ITy; |
158 | } |
159 | |
160 | #if FACEBOOK_INTERNAL |
161 | llvm::ArrayRef<llvm::MemoryBufferRef> CPUBackend::getObjectRegistry() const { |
162 | return llvm::ArrayRef<llvm::MemoryBufferRef>(); |
163 | } |
164 | #else |
165 | #include "cpuObjectRegistry.h" |
166 | llvm::ArrayRef<llvm::MemoryBufferRef> CPUBackend::getObjectRegistry() const { |
167 | return cpuObjectRegistry; |
168 | } |
169 | #endif |
170 | |