1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | #if defined(TVM_LLVM_VERSION) && TVM_LLVM_VERSION >= 70 |
21 | |
22 | #include <llvm/ADT/ArrayRef.h> |
23 | #include <llvm/ADT/SmallString.h> |
24 | #include <llvm/ADT/StringRef.h> |
25 | #include <llvm/Bitcode/BitcodeWriter.h> |
26 | #include <llvm/IR/Constants.h> |
27 | #include <llvm/IR/DerivedTypes.h> |
28 | #include <llvm/IR/Function.h> |
29 | #include <llvm/IR/GlobalVariable.h> |
30 | #include <llvm/IR/Instructions.h> |
31 | #include <llvm/IR/Intrinsics.h> |
32 | #if TVM_LLVM_VERSION >= 100 |
33 | #include <llvm/IR/IntrinsicsHexagon.h> |
34 | #endif |
35 | #include <llvm/IR/LLVMContext.h> |
36 | #include <llvm/IR/LegacyPassManager.h> |
37 | #include <llvm/IR/MDBuilder.h> |
38 | #include <llvm/IR/Module.h> |
39 | #if TVM_LLVM_VERSION >= 100 |
40 | #include <llvm/Support/Alignment.h> |
41 | #endif |
42 | #include <llvm/Support/CodeGen.h> |
43 | #include <llvm/Support/CommandLine.h> |
44 | #include <llvm/Support/FileSystem.h> |
45 | #include <llvm/Support/raw_ostream.h> |
46 | #include <llvm/Target/TargetMachine.h> |
47 | #include <llvm/Transforms/Utils/Cloning.h> |
48 | #include <tvm/runtime/module.h> |
49 | #include <tvm/target/codegen.h> |
50 | #include <tvm/tir/analysis.h> |
51 | |
52 | #include <cstdio> |
53 | #include <cstdlib> |
54 | #include <map> |
55 | #include <sstream> |
56 | #include <string> |
57 | #include <unordered_map> |
58 | #include <utility> |
59 | #include <vector> |
60 | |
61 | #include "../../runtime/hexagon/hexagon_module.h" |
62 | #include "../build_common.h" |
63 | #include "codegen_cpu.h" |
64 | #include "llvm_instance.h" |
65 | |
66 | namespace tvm { |
67 | namespace codegen { |
68 | |
69 | // Hexagon code generation |
70 | class CodeGenHexagon final : public CodeGenCPU { |
71 | public: |
72 | void Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, |
73 | bool dynamic_lookup, bool target_c_runtime) override; |
74 | void InitTarget() final; |
75 | |
76 | using CodeGenCPU::VisitStmt_; |
77 | llvm::Value* VisitExpr_(const BufferLoadNode* op) override; |
78 | llvm::Value* CreateIntrinsic(const CallNode* op) override; |
79 | |
80 | llvm::Value* CreateCallExtern(Type ret_type, String global_symbol, const Array<PrimExpr>& args, |
81 | bool skip_first_arg) override; |
82 | llvm::Value* CreateCallExternQHL(Type ret_type, String global_symbol, const Array<PrimExpr>& args, |
83 | bool skip_first_arg); |
84 | |
85 | llvm::Module* GetModulePtr() const { return module_.get(); } |
86 | |
87 | uint64_t GetTypeSizeInBits(llvm::Type* type) const { |
88 | #if TVM_LLVM_VERSION >= 100 |
89 | return data_layout_->getTypeSizeInBits(type).getFixedSize(); |
90 | #else |
91 | return data_layout_->getTypeSizeInBits(type); |
92 | #endif |
93 | } |
94 | |
95 | protected: |
96 | void CreatePrintf(const std::string& format, llvm::ArrayRef<llvm::Value*> format_args) final; |
97 | |
98 | private: |
99 | TypedPointer CreateBufferPtr(llvm::Value* buffer_ptr, DataType buffer_element_dtype, |
100 | llvm::ArrayRef<llvm::Value*> indices, DataType value_dtype) final; |
101 | TypedPointer CreateStructRefPtr(DataType t, llvm::Value* buf, llvm::Value* index, int kind); |
102 | |
103 | bool IsQHLFunction(const std::string& func); |
104 | |
105 | llvm::Value* VectorLookupLoad(Buffer buffer, DataType buffer_type, Array<PrimExpr> indices); |
106 | llvm::Value* Intrinsic(llvm::Intrinsic::ID, llvm::ArrayRef<llvm::Value*> args); |
107 | std::vector<std::string> fqhl_list_ = { |
108 | "tvm_vect_qhmath_hvx_cos_ahf" , "tvm_vect_qhmath_hvx_tanh_ahf" , |
109 | "tvm_vect_qhmath_hvx_sigmoid_ahf" , "tvm_vect_qhmath_hvx_sin_ahf" , |
110 | "tvm_vect_qhmath_hvx_sqrt_ahf" , "tvm_vect_qhmath_hvx_exp_ahf" , |
111 | "tvm_vect_qhmath_hvx_tan_ahf" , "tvm_vect_qhmath_hvx_floor_ahf" , |
112 | "tvm_vect_qhmath_hvx_ceil_ahf" , "tvm_vect_qhmath_hvx_pow_ahf" }; |
113 | }; |
114 | |
115 | void CodeGenHexagon::Init(const std::string& module_name, LLVMTarget* llvm_target, bool system_lib, |
116 | bool dynamic_lookup, bool target_c_runtime) { |
117 | CodeGenCPU::Init(module_name, llvm_target, system_lib, dynamic_lookup, target_c_runtime); |
118 | } |
119 | |
120 | void CodeGenHexagon::InitTarget() { |
121 | native_vector_bits_ = 64; // Assume "scalar" vectors at first. |
122 | const auto hvx_length_feature = "+hvx-length" ; // +hvx-length{64|128}b |
123 | for (const std::string& f : llvm_target_->GetTargetFeatures()) { |
124 | llvm::StringRef fs(f); |
125 | if (!fs.startswith(hvx_length_feature)) continue; |
126 | |
127 | ICHECK(fs.endswith("b" )) << "malformed target feature: " << f; |
128 | int hvx_bytes = 0; |
129 | size_t len_begin = std::strlen(hvx_length_feature); |
130 | ICHECK(!fs.substr(len_begin, fs.size() - len_begin - 1).getAsInteger(10, hvx_bytes)) |
131 | << "invalid HVX length in feature string: " << f; |
132 | ICHECK(hvx_bytes == 64 || hvx_bytes == 128) |
133 | << "invalid HVX vector length: " << hvx_bytes << ", should be 64 or 128" ; |
134 | native_vector_bits_ = hvx_bytes * 8; |
135 | // There should only be one hvx-length... |
136 | break; |
137 | } |
138 | CodeGenCPU::InitTarget(); |
139 | } |
140 | |
141 | llvm::Value* CodeGenHexagon::CreateCallExternQHL(Type ret_type, String global_symbol, |
142 | const Array<PrimExpr>& args, bool skip_first_arg) { |
143 | int num_lanes = args[1].dtype().lanes(); |
144 | int vector_length = native_vector_bits_ / args[1].dtype().bits(); |
145 | num_lanes = ((num_lanes + vector_length - 1) / vector_length) * vector_length; |
146 | std::vector<llvm::Value*> vect_split; |
147 | for (int i = 0; i < num_lanes / vector_length; ++i) { |
148 | std::vector<llvm::Value*> sub_vect_val; |
149 | std::vector<llvm::Type*> arg_types; |
150 | for (size_t k = skip_first_arg; k < args.size(); ++k) |
151 | sub_vect_val.push_back( |
152 | CodeGenCPU::CreateVecSlice(MakeValue(args[k]), i * vector_length, vector_length)); |
153 | for (llvm::Value* v : sub_vect_val) { |
154 | arg_types.push_back(v->getType()); |
155 | } |
156 | llvm::FunctionType* ftype = llvm::FunctionType::get(arg_types[0], arg_types, false); |
157 | llvm::Function* f = module_->getFunction(MakeStringRef(global_symbol)); |
158 | if (f == nullptr) { |
159 | f = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, |
160 | MakeStringRef(global_symbol), module_.get()); |
161 | } |
162 | #if TVM_LLVM_VERSION >= 90 |
163 | auto ext_callee = llvm::FunctionCallee(f); |
164 | #else |
165 | auto ext_callee = f; |
166 | #endif |
167 | vect_split.push_back(builder_->CreateCall(ext_callee, sub_vect_val)); |
168 | } |
169 | return CodeGenCPU::CreateVecConcat(vect_split); |
170 | } |
171 | |
172 | bool CodeGenHexagon::IsQHLFunction(const std::string& func) { |
173 | return std::find(fqhl_list_.begin(), fqhl_list_.end(), func) != fqhl_list_.end(); |
174 | } |
175 | |
176 | llvm::Value* CodeGenHexagon::CreateCallExtern(Type ret_type, String global_symbol, |
177 | const Array<PrimExpr>& args, bool skip_first_arg) { |
178 | int num_lanes = args[1].dtype().lanes(); |
179 | int vector_length = native_vector_bits_ / args[1].dtype().bits(); |
180 | if (IsQHLFunction(global_symbol) && (num_lanes > vector_length)) |
181 | return CreateCallExternQHL(ret_type, global_symbol, args, skip_first_arg); |
182 | return CodeGenCPU::CreateCallExtern(ret_type, global_symbol, args, skip_first_arg); |
183 | } |
184 | |
185 | llvm::Value* CodeGenHexagon::VisitExpr_(const BufferLoadNode* op) { |
186 | if (!op->buffer.same_as(op->buffer->data)) { |
187 | // Check if we can generate a vector lookup. |
188 | if (!op->indices[0].as<RampNode>()) { |
189 | if (auto* vlut = VectorLookupLoad(op->buffer, op->dtype, op->indices)) { |
190 | return vlut; |
191 | } |
192 | } |
193 | } |
194 | return CodeGenCPU::VisitExpr_(op); |
195 | } |
196 | |
197 | llvm::Value* CodeGenHexagon::CreateIntrinsic(const CallNode* op) { |
198 | #if TVM_LLVM_VERSION >= 150 |
199 | if (op->op.same_as(builtin::start_profile_intrinsic()) || |
200 | op->op.same_as(builtin::end_profile_intrinsic())) { |
201 | llvm::Value* id = MakeValue(op->args[0]); |
202 | auto instrprof_id = llvm::Intrinsic::hexagon_instrprof_custom; |
203 | llvm::Function* func = llvm::Intrinsic::getDeclaration(module_.get(), instrprof_id); |
204 | llvm::GlobalVariable* name_var = module_->getGlobalVariable("handler_name" ); |
205 | if (!name_var) { |
206 | llvm::StringRef init_str = "lwp_handler" ; |
207 | llvm::Constant* init = llvm::ConstantDataArray::getString(module_->getContext(), init_str); |
208 | |
209 | name_var = new llvm::GlobalVariable(*module_, init->getType(), true, |
210 | llvm::GlobalValue::InternalLinkage, init, "handler_name" ); |
211 | } |
212 | llvm::Type* t_int8_p_ = t_int8_->getPointerTo(); |
213 | return builder_->CreateCall(func, {llvm::ConstantExpr::getBitCast(name_var, t_int8_p_), id}); |
214 | } |
215 | #endif |
216 | return CodeGenCPU::CreateIntrinsic(op); |
217 | } |
218 | |
219 | void CodeGenHexagon::CreatePrintf(const std::string& format, |
220 | llvm::ArrayRef<llvm::Value*> format_args) { |
221 | // This function generates LLVM instructions to call HAP_debug_v2, |
222 | // as if the FARF macro in `HAP_farf.h` were called as |
223 | // FARF(ALWAYS, format, format_args[0], format_args[1], ...) |
224 | std::string func_name = "HAP_debug_v2" ; |
225 | |
226 | llvm::Function* func = module_->getFunction(func_name); |
227 | if (func == nullptr) { |
228 | llvm::FunctionType* ftype = llvm::FunctionType::get( |
229 | t_void_, {t_int32_, t_char_->getPointerTo(), t_int32_, t_char_->getPointerTo()}, true); |
230 | func = llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, func_name, module_.get()); |
231 | } |
232 | |
233 | llvm::Value* format_str = builder_->CreateGlobalStringPtr(format, "printf_format_str" ); |
234 | |
235 | // The value of FARF_ALWAYS_LEVEL, defined as HAP_LEVEL_HIGH |
236 | llvm::Value* level = ConstInt32(2); |
237 | |
238 | // There is no such filename/line number for this print statement |
239 | llvm::Value* filename = builder_->CreateGlobalStringPtr("generated-LLVM-code" , "dummy_filename" ); |
240 | llvm::Value* line_number = ConstInt32(1); |
241 | |
242 | std::vector<llvm::Value*> func_args = {level, filename, line_number, format_str}; |
243 | func_args.insert(func_args.end(), format_args.begin(), format_args.end()); |
244 | |
245 | builder_->CreateCall(func, func_args); |
246 | } |
247 | |
248 | CodeGenLLVM::TypedPointer CodeGenHexagon::CreateBufferPtr(llvm::Value* buffer_ptr, |
249 | DataType buffer_element_dtype, |
250 | llvm::ArrayRef<llvm::Value*> indices, |
251 | DataType value_dtype) { |
252 | // Flat indices get delegated to the LLVM codegen. |
253 | if (indices.size() == 1) { |
254 | return CodeGenCPU::CreateBufferPtr(buffer_ptr, buffer_element_dtype, indices, value_dtype); |
255 | } |
256 | |
257 | ICHECK_EQ(indices.size(), 2) << "CodegenHexagon supports 1-d and 2-d physical buffers, received " |
258 | << indices.size() << "-d buffer indices" ; |
259 | |
260 | // Use the first index to identify the pointer. |
261 | DataType dtype_void_ptr = DataType::Handle(); |
262 | CodeGenLLVM::TypedPointer buffer_chunk_ptr_ptr = |
263 | CodeGenCPU::CreateBufferPtr(buffer_ptr, dtype_void_ptr, {indices[0]}, dtype_void_ptr); |
264 | llvm::Value* buffer_chunk_ptr = |
265 | builder_->CreateLoad(buffer_chunk_ptr_ptr.type, buffer_chunk_ptr_ptr.addr); |
266 | |
267 | // Then delegate the CodeGenLLVM to find the value from the second |
268 | // index. |
269 | return CodeGenCPU::CreateBufferPtr(buffer_chunk_ptr, buffer_element_dtype, {indices[1]}, |
270 | value_dtype); |
271 | } |
272 | |
273 | CodeGenLLVM::TypedPointer CodeGenHexagon::CreateStructRefPtr(DataType t, llvm::Value* buf, |
274 | llvm::Value* index, int kind) { |
275 | static const std::map<int, int> field_index = { |
276 | {builtin::kArrData, 0}, {builtin::kArrDeviceType, 1}, {builtin::kArrDeviceId, 1}, |
277 | {builtin::kArrNDim, 2}, {builtin::kArrTypeCode, 3}, {builtin::kArrTypeBits, 3}, |
278 | {builtin::kArrTypeLanes, 3}, {builtin::kArrShape, 4}, {builtin::kArrStrides, 5}, |
279 | {builtin::kArrByteOffset, 6}}; |
280 | static const std::map<int, int> subfield_index = { |
281 | {builtin::kArrDeviceType, 0}, {builtin::kArrDeviceId, 1}, {builtin::kArrTypeCode, 0}, |
282 | {builtin::kArrTypeBits, 1}, {builtin::kArrTypeLanes, 2}, |
283 | }; |
284 | |
285 | if (kind < builtin::kArrKindBound_) { |
286 | if (buf->getType() == t_void_p_) { |
287 | buf = builder_->CreatePointerCast(buf, t_tvm_array_->getPointerTo()); |
288 | } else { |
289 | ICHECK_EQ(buf->getType(), t_tvm_array_->getPointerTo()); |
290 | } |
291 | /* The following "kinds" are accessing the members of DLTensor: |
292 | typedef struct { |
293 | void* data; kArrData |
294 | DLDevice device; kArrDeviceType (device.device_type) |
295 | kArrDeviceId (device.device_id) |
296 | int ndim; kArrNDim |
297 | DLDataType dtype; kArrTypeCode (dtype.code) |
298 | kArrTypeBits (dtype.bits) |
299 | kArrTypeLanes (dtype.lanes) |
300 | int64_t* shape; kArrShape |
301 | int64_t* strides; kArrStrides |
302 | uint64_t byte_offset; kArrByteOffset |
303 | } DLTensor; |
304 | */ |
305 | llvm::Value* base_gep = builder_->CreateInBoundsGEP(t_tvm_array_, buf, index, "base_gep" ); |
306 | if (kind == builtin::kArrAddr) { |
307 | return TypedPointer(t_void_p_, base_gep); |
308 | } |
309 | llvm::Value* field_gep = builder_->CreateInBoundsGEP( |
310 | t_tvm_array_, base_gep, {ConstInt32(0), ConstInt32(field_index.at(kind))}, "field_gep" ); |
311 | llvm::Type* field_type = t_tvm_array_->getStructElementType(field_index.at(kind)); |
312 | switch (kind) { |
313 | // These fields have no sub-fields. |
314 | case builtin::kArrData: |
315 | case builtin::kArrNDim: |
316 | case builtin::kArrShape: |
317 | case builtin::kArrStrides: |
318 | case builtin::kArrByteOffset: |
319 | return TypedPointer(field_type, field_gep); |
320 | } |
321 | llvm::Value* subfield_gep = builder_->CreateInBoundsGEP( |
322 | field_type, field_gep, {ConstInt32(0), ConstInt32(subfield_index.at(kind))}, |
323 | "subfield_gep" ); |
324 | llvm::Type* subfield_type = field_type->getStructElementType(subfield_index.at(kind)); |
325 | return TypedPointer(subfield_type, subfield_gep); |
326 | } |
327 | |
328 | if (kind == builtin::kTVMValueContent) { |
329 | /* TVMValue is a union: |
330 | typedef union { |
331 | int64_t v_int64; |
332 | double v_float64; |
333 | void* v_handle; |
334 | const char* v_str; |
335 | TVMType v_type; |
336 | DLDevice v_device; |
337 | } TVMValue; |
338 | */ |
339 | ICHECK_EQ(t.lanes(), 1); |
340 | ICHECK(t.is_handle() || t.bits() == 64); |
341 | if (t.is_int()) { |
342 | buf = builder_->CreatePointerCast(buf, t_int64_->getPointerTo()); |
343 | return TypedPointer(t_int64_, builder_->CreateInBoundsGEP(t_int64_, buf, index)); |
344 | } else if (t.is_float()) { |
345 | buf = builder_->CreatePointerCast(buf, t_float64_->getPointerTo()); |
346 | return TypedPointer(t_float64_, builder_->CreateInBoundsGEP(t_float64_, buf, index)); |
347 | } else { |
348 | ICHECK(t.is_handle()); |
349 | buf = builder_->CreatePointerCast(buf, t_tvm_value_->getPointerTo()); |
350 | buf = builder_->CreateInBoundsGEP(t_tvm_value_, buf, index); |
351 | return TypedPointer(t_void_p_, builder_->CreatePointerCast(buf, t_void_p_->getPointerTo())); |
352 | } |
353 | } |
354 | |
355 | assert(!"Unknown kind" ); |
356 | return TypedPointer(); |
357 | } |
358 | |
359 | llvm::Value* CodeGenHexagon::Intrinsic(llvm::Intrinsic::ID IntID, |
360 | llvm::ArrayRef<llvm::Value*> args) { |
361 | llvm::Function* intf = llvm::Intrinsic::getDeclaration(module_.get(), IntID); |
362 | #if TVM_LLVM_VERSION >= 90 |
363 | auto intf_callee = llvm::FunctionCallee(intf); |
364 | #else |
365 | auto intf_callee = intf; |
366 | #endif |
367 | std::vector<llvm::Value*> conv_args; |
368 | llvm::FunctionType* intf_type = intf->getFunctionType(); |
369 | ICHECK(args.size() == intf_type->getNumParams()); |
370 | |
371 | for (int i = 0, e = args.size(); i != e; ++i) { |
372 | llvm::Value* arg = args[i]; |
373 | auto* need_type = llvm::dyn_cast<llvm::VectorType>(intf_type->getParamType(i)); |
374 | auto* have_type = llvm::dyn_cast<llvm::VectorType>(arg->getType()); |
375 | if (need_type != nullptr && have_type != nullptr && need_type != have_type) { |
376 | int need_width = GetTypeSizeInBits(need_type); |
377 | int have_width = GetTypeSizeInBits(have_type); |
378 | if (need_width == have_width) { |
379 | if (need_width == native_vector_bits_ || need_width == 2 * native_vector_bits_) { |
380 | arg = builder_->CreateBitCast(arg, need_type); |
381 | } |
382 | } // TODO(joshherr-quic): add handling of v128i1 <-> v1024i1 |
383 | } |
384 | conv_args.push_back(arg); |
385 | } |
386 | return builder_->CreateCall(intf_callee, conv_args); |
387 | } |
388 | |
389 | llvm::Value* CodeGenHexagon::VectorLookupLoad(Buffer buffer, DataType buffer_type, |
390 | Array<PrimExpr> indices) { |
391 | PrimExpr index = indices[0]; |
392 | if (!index.dtype().is_vector()) { |
393 | return nullptr; |
394 | } |
395 | |
396 | if (buffer_type.bits() != 8) return nullptr; |
397 | |
398 | int table_elem_count = arith::Analyzer().Simplify(buffer->shape[0]).as<IntImmNode>()->value; |
399 | if (table_elem_count <= 0 || table_elem_count > 256) return nullptr; |
400 | |
401 | auto int32 = DataType::Int(32); |
402 | auto native_vector_bytes = native_vector_bits_ / 8; |
403 | |
404 | // Indexes |
405 | llvm::Value* trunc = MakeValue(Cast(index.dtype().with_bits(8), index)); |
406 | llvm::Value* index_pad = CreateVecPad(trunc, native_vector_bytes); |
407 | |
408 | // Values |
409 | std::vector<llvm::Value*> vloads; |
410 | DataType table_type = buffer_type.with_lanes(table_elem_count); |
411 | |
412 | auto table_all = |
413 | MakeValue(BufferLoad(buffer, { |
414 | Ramp(IntImm(int32, 0), IntImm(int32, 1), table_elem_count), |
415 | })); |
416 | |
417 | // The number of value vectors should be a power of 2. |
418 | int table_vec_count = llvm::PowerOf2Ceil(GetVectorBytes(table_type) / native_vector_bytes); |
419 | int table_vec_length = native_vector_bytes / buffer_type.bytes(); |
420 | for (int i = 0; i != table_vec_count; ++i) { |
421 | // CreateVecSlice will generate undefs for elements outside the source vector. |
422 | vloads.push_back(CreateVecSlice(table_all, i * table_vec_length, table_vec_length)); |
423 | } |
424 | |
425 | #define VLO(x) Intrinsic(llvm::Intrinsic::hexagon_V6_lo_128B, {x}) |
426 | #define VHI(x) Intrinsic(llvm::Intrinsic::hexagon_V6_hi_128B, {x}) |
427 | #define VXOR(x, y) Intrinsic(llvm::Intrinsic::hexagon_V6_vxor_128B, {x, y}) |
428 | #define VSHUFF(x) Intrinsic(llvm::Intrinsic::hexagon_V6_vshuffb_128B, {x}) |
429 | #define VSPLATB(x) Intrinsic(llvm::Intrinsic::hexagon_V6_lvsplatb_128B, {x}) |
430 | #define VLUT32(x, y, z) Intrinsic(llvm::Intrinsic::hexagon_V6_vlutvvbi_128B, {x, y, z}) |
431 | #define VLUT32_OR(v, x, y, z) \ |
432 | Intrinsic(llvm::Intrinsic::hexagon_V6_vlutvvb_oracci_128B, {v, x, y, z}) |
433 | |
434 | // Shuffle table bytes: |
435 | // 127, 63, 126, 62,........68, 4, 67, 3, 66, 2, 65, 1, 64, 0 |
436 | std::vector<llvm::Value*> table; |
437 | for (int i = 0; i != table_vec_count; ++i) table.push_back(VSHUFF(vloads[i])); |
438 | |
439 | // Get each 32 byte sub-table's output |
440 | std::vector<llvm::Value*> results; |
441 | int table_iters = table_elem_count / 32; |
442 | for (int i = 0; i < table_iters; ++i) |
443 | results.push_back(VLUT32(index_pad, table[i / 4], ConstInt32(i % 8))); |
444 | |
445 | // Combine outputs |
446 | llvm::Value* result = results[0]; |
447 | for (int i = 1; i < table_iters; ++i) result = VXOR(result, results[i]); |
448 | |
449 | llvm::Type* res_type = result->getType(); |
450 | llvm::Type* ret_type = DTypeToLLVMType(buffer_type); |
451 | if (res_type == ret_type) { |
452 | return result; |
453 | } |
454 | |
455 | int res_bits = GetTypeSizeInBits(res_type); |
456 | int ret_bits = GetTypeSizeInBits(ret_type); |
457 | ICHECK_GE(res_bits, ret_bits); |
458 | if (ret_bits < res_bits) { |
459 | #if TVM_LLVM_VERSION >= 110 |
460 | llvm::Type* res_byte_type = llvm::VectorType::get(t_int8_, res_bits / 8, /*Scalable*/ false); |
461 | #else |
462 | llvm::Type* res_byte_type = llvm::VectorType::get(t_int8_, res_bits / 8); |
463 | #endif |
464 | result = CreateVecSlice(builder_->CreateBitCast(result, res_byte_type), 0, ret_bits / 8); |
465 | } |
466 | if (result->getType() != ret_type) { |
467 | return builder_->CreateBitCast(result, ret_type); |
468 | } |
469 | return result; |
470 | |
471 | #undef VLUT32_OR |
472 | #undef VLUT32 |
473 | #undef VSPLATB |
474 | #undef VSHUFF |
475 | #undef VXOR |
476 | #undef VHI |
477 | #undef VLO |
478 | } |
479 | |
480 | namespace { |
481 | DMLC_ATTRIBUTE_UNUSED std::ostream& operator<<(std::ostream& os, const llvm::Module& m) { |
482 | std::string ms; |
483 | llvm::raw_string_ostream sos(ms); |
484 | sos << m; |
485 | os << sos.str(); |
486 | return os; |
487 | } |
488 | |
489 | void ProcessLLVMOptions(const std::vector<std::string>& llvm_vec) { |
490 | if (llvm_vec.empty()) return; |
491 | |
492 | // LLVM options. |
493 | std::vector<const char*> starts; |
494 | std::transform(llvm_vec.begin(), llvm_vec.end(), std::back_inserter(starts), |
495 | std::mem_fn(&std::string::c_str)); |
496 | const char** args = &starts.front(); |
497 | |
498 | llvm::cl::ParseCommandLineOptions(llvm_vec.size(), args); |
499 | } |
500 | } // namespace |
501 | |
502 | runtime::Module BuildHexagon(IRModule mod, Target target) { |
503 | LLVMInstance llvm_instance; |
504 | With<LLVMTarget> llvm_target(llvm_instance, target); |
505 | |
506 | auto split = [](const std::string& str, char delim = ' ') { |
507 | std::vector<std::string> vec; |
508 | std::string tmp; |
509 | for (std::istringstream iss(str); std::getline(iss, tmp, delim);) { |
510 | vec.push_back(tmp); |
511 | } |
512 | return vec; |
513 | }; |
514 | std::string llvm_options_str = "llvm" ; |
515 | if (const auto& llvm_options = target->GetAttr<Array<String>>("llvm-options" )) { |
516 | for (const String& s : llvm_options.value()) llvm_options_str += "," + s; |
517 | } |
518 | // Postprocess the LLVM options string: replace '@' with '=', and ',' with ' '. |
519 | for (int i = 0, e = llvm_options_str.size(); i != e; ++i) { |
520 | switch (llvm_options_str[i]) { |
521 | case '@': |
522 | llvm_options_str[i] = '='; |
523 | break; |
524 | case ',': |
525 | llvm_options_str[i] = ' '; |
526 | break; |
527 | } |
528 | } |
529 | |
530 | // The vector of LLVM options is treated at "argv" from "main(argc, argv)". The entry at |
531 | // position 0 is the name of the executable, and is ignored by the LLVM cl::option parser. |
532 | // Make sure it's set to "llvm" (tvm.target.hexagon does that). |
533 | std::vector<std::string> llvm_options_vec = split(llvm_options_str); |
534 | assert(llvm_options_vec.size() >= 1 && llvm_options_vec[0] == "llvm" ); |
535 | llvm_options_vec.insert(std::next(llvm_options_vec.begin()), |
536 | {"-hexagon-small-data-threshold=0" , |
537 | "-force-target-max-vector-interleave=1" , "-hexagon-autohvx=1" }); |
538 | |
539 | // Process extra command line options for LLVM. Make sure it's only |
540 | // done once. |
541 | static bool CallOnce = (ProcessLLVMOptions(llvm_options_vec), true); |
542 | (void)CallOnce; |
543 | |
544 | auto cg = std::make_unique<CodeGenHexagon>(); |
545 | |
546 | std::vector<PrimFunc> funcs; |
547 | std::string entry_func; |
548 | |
549 | for (auto kv : mod->functions) { |
550 | if (!kv.second->IsInstance<PrimFuncNode>()) { |
551 | // (@jroesch): we relax constraints here, Relay functions will just be ignored. |
552 | DLOG(INFO) << "Can only lower IR Module with PrimFuncs, but got " << kv.second->GetTypeKey(); |
553 | continue; |
554 | } |
555 | auto f = Downcast<PrimFunc>(kv.second); |
556 | if (f->HasNonzeroAttr(tir::attr::kIsEntryFunc)) { |
557 | auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol); |
558 | ICHECK(global_symbol.defined()); |
559 | entry_func = global_symbol.value(); |
560 | } |
561 | funcs.emplace_back(f); |
562 | } |
563 | |
564 | cg->Init("TVMHexagonModule" , llvm_target.get(), false, false, false); |
565 | cg->AddFunctionsOrdered(funcs.begin(), funcs.end()); |
566 | if (entry_func.length() != 0) { |
567 | cg->AddMainFunction(entry_func); |
568 | } |
569 | |
570 | // Uncomment to get the LLVM module right out of codegen, before optimizations. |
571 | // std::cerr << "HexagonModule.0 {\n" << *cg->GetModulePtr() << "}\n"; |
572 | std::unique_ptr<llvm::Module> module = cg->Finish(); |
573 | |
574 | enum CodeGenFileType { Asm, Obj, IR, BC }; |
575 | |
576 | auto EmitToString = [&llvm_target](const llvm::Module& m, CodeGenFileType cgft) { |
577 | std::string out; |
578 | |
579 | if (cgft == IR || cgft == BC) { |
580 | llvm::raw_string_ostream os(out); |
581 | if (cgft == IR) |
582 | m.print(os, nullptr); |
583 | else |
584 | llvm::WriteBitcodeToFile(m, os); |
585 | } else if (cgft == Asm || cgft == Obj) { |
586 | #if TVM_LLVM_VERSION <= 90 |
587 | auto ft = cgft == Asm ? llvm::TargetMachine::CodeGenFileType::CGFT_AssemblyFile |
588 | : llvm::TargetMachine::CodeGenFileType::CGFT_ObjectFile; |
589 | #else |
590 | auto ft = cgft == Asm ? llvm::CGFT_AssemblyFile : llvm::CGFT_ObjectFile; |
591 | #endif |
592 | |
593 | llvm::SmallString<16384> ss; // Will grow on demand. |
594 | llvm::raw_svector_ostream os(ss); |
595 | std::unique_ptr<llvm::Module> cm = llvm::CloneModule(m); |
596 | llvm::legacy::PassManager pass; |
597 | llvm::TargetMachine* tm = llvm_target->GetOrCreateTargetMachine(); |
598 | ICHECK(tm->addPassesToEmitFile(pass, os, nullptr, ft) == 0) << "Cannot emit target code" ; |
599 | pass.run(*cm.get()); |
600 | out.assign(ss.c_str(), ss.size()); |
601 | } |
602 | |
603 | return out; |
604 | }; |
605 | |
606 | auto SaveToFile = [](const std::string& data, const std::string& suffix) { |
607 | llvm::SmallString<64> file_name; |
608 | int fd; |
609 | std::error_code ec = llvm::sys::fs::createTemporaryFile("tvm" , suffix, fd, file_name); |
610 | ICHECK_EQ(static_cast<bool>(ec), false) << ec.message(); |
611 | llvm::raw_fd_ostream file(fd, true); |
612 | file << data; |
613 | ICHECK(!file.has_error()) << file.error().message(); |
614 | // If there is an error, execution will never get here, but return |
615 | // {ec, name} anyway to allow caller to handle error conditions. |
616 | // This way the "ICHECK" above can be removed with minimal effort. |
617 | return std::make_pair(file.error(), std::string(file_name.c_str())); |
618 | }; |
619 | |
620 | std::string asm_str = EmitToString(*module.get(), Asm); |
621 | std::string obj_str = EmitToString(*module.get(), Obj); |
622 | std::string ir_str = EmitToString(*module.get(), IR); |
623 | std::string bc_str = EmitToString(*module.get(), BC); |
624 | |
625 | std::string o_name = SaveToFile(obj_str, "o" ).second; |
626 | std::string so_name(o_name, 0, o_name.size() - 1); |
627 | so_name += "so" ; |
628 | |
629 | const auto* f = tvm::runtime::Registry::Get("tvm.contrib.hexagon.link_shared" ); |
630 | ICHECK(f != nullptr) << "tvm.contrib.hexagon.link_shared does not to exist, " |
631 | "do import tvm.contrib.hexagon" ; |
632 | |
633 | Array<PrimExpr> o_names = {StringImm(o_name)}; |
634 | Map<String, String> ; |
635 | if (target->attrs.count("mcpu" )) { |
636 | std::string mcpu = Downcast<String>(target->attrs.at("mcpu" )); |
637 | ICHECK(llvm::StringRef(mcpu).startswith("hexagon" )) |
638 | << "unexpected -mcpu value in target:" << mcpu; |
639 | extra_args.Set("hex_arch" , llvm::StringRef(mcpu).drop_front(strlen("hexagon" )).str()); |
640 | } |
641 | int rc = (*f)(so_name, o_names, extra_args); |
642 | ICHECK(rc == 0) << "Failed to link " << so_name; |
643 | |
644 | return HexagonModuleCreate(so_name, "so" , ExtractFuncInfo(mod), asm_str, obj_str, ir_str, bc_str); |
645 | } |
646 | |
647 | TVM_REGISTER_GLOBAL("target.build.hexagon" ).set_body_typed(BuildHexagon); |
648 | |
649 | TVM_REGISTER_GLOBAL("tvm.codegen.llvm.target_hexagon" ) |
650 | .set_body([](const TVMArgs& targs, TVMRetValue* rv) { |
651 | *rv = static_cast<void*>(new CodeGenHexagon()); |
652 | }); |
653 | |
654 | } // namespace codegen |
655 | } // namespace tvm |
656 | |
657 | #endif // TVM_LLVM_VERSION |
658 | |