1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file intrin_rule_opencl.cc |
22 | * \brief OpenCL intrinsic rules. |
23 | */ |
24 | #include <tvm/arith/analyzer.h> |
25 | #include <tvm/tir/op_attr_types.h> |
26 | |
27 | #include "../intrin_rule.h" |
28 | |
29 | namespace tvm { |
30 | namespace codegen { |
31 | namespace intrin { |
32 | using tir::FLowerIntrinsic; |
33 | |
34 | TVM_REGISTER_OP("tir.floor" ) |
35 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
36 | |
37 | TVM_REGISTER_OP("tir.ceil" ) |
38 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
39 | |
40 | TVM_REGISTER_OP("tir.trunc" ) |
41 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
42 | |
43 | TVM_REGISTER_OP("tir.fabs" ) |
44 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
45 | |
46 | TVM_REGISTER_OP("tir.round" ) |
47 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
48 | |
49 | TVM_REGISTER_OP("tir.nearbyint" ) |
50 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
51 | |
52 | TVM_REGISTER_OP("tir.exp" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
53 | DispatchPureExtern<Direct>); |
54 | |
55 | TVM_REGISTER_OP("tir.erf" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
56 | DispatchPureExtern<Direct>); |
57 | |
58 | TVM_REGISTER_OP("tir.exp2" ) |
59 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
60 | |
61 | TVM_REGISTER_OP("tir.exp10" ) |
62 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
63 | |
64 | TVM_REGISTER_OP("tir.log" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
65 | DispatchPureExtern<Direct>); |
66 | |
67 | TVM_REGISTER_OP("tir.log2" ) |
68 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
69 | |
70 | TVM_REGISTER_OP("tir.log10" ) |
71 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
72 | |
73 | TVM_REGISTER_OP("tir.tanh" ) |
74 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
75 | |
76 | TVM_REGISTER_OP("tir.sqrt" ) |
77 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
78 | |
79 | TVM_REGISTER_OP("tir.pow" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
80 | DispatchPureExtern<Direct>); |
81 | |
82 | TVM_REGISTER_OP("tir.popcount" ) |
83 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
84 | |
85 | TVM_REGISTER_OP("tir.fmod" ) |
86 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
87 | |
88 | TVM_REGISTER_OP("tir.sin" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
89 | DispatchPureExtern<Direct>); |
90 | |
91 | TVM_REGISTER_OP("tir.sinh" ) |
92 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
93 | |
94 | TVM_REGISTER_OP("tir.cos" ).set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , |
95 | DispatchPureExtern<Direct>); |
96 | |
97 | TVM_REGISTER_OP("tir.cosh" ) |
98 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchPureExtern<Direct>); |
99 | |
100 | // There is no warp shuffle instruction in standard OpenCL |
101 | // When shuffle is used, we assume it is intel's shuffle extension |
102 | static PrimExpr DispatchIntelShuffle(const PrimExpr& e) { |
103 | const CallNode* call = e.as<CallNode>(); |
104 | ICHECK(call != nullptr); |
105 | ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size |
106 | arith::Analyzer analyzer; |
107 | ICHECK(analyzer.CanProve(call->args[3] == call->args[4])) |
108 | << "Intel warp shuffle dose not support width != warp_size" ; |
109 | Array<PrimExpr> opencl_args{{StringImm("intel_sub_group_shuffle" ), call->args[1], call->args[2]}}; |
110 | return Call(call->dtype, builtin::call_pure_extern(), opencl_args); |
111 | } |
112 | |
113 | TVM_REGISTER_OP("tir.tvm_warp_shuffle" ) |
114 | .set_attr<FLowerIntrinsic>("opencl.FLowerIntrinsic" , DispatchIntelShuffle); |
115 | |
116 | } // namespace intrin |
117 | } // namespace codegen |
118 | } // namespace tvm |
119 | |