1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \brief layer normalization op constructions |
22 | * \file nn/layer_norm.h |
23 | */ |
24 | #ifndef TVM_TOPI_NN_LAYER_NORM_H_ |
25 | #define TVM_TOPI_NN_LAYER_NORM_H_ |
26 | |
27 | #include <tvm/te/operation.h> |
28 | #include <tvm/topi/tags.h> |
29 | |
30 | #include <string> |
31 | |
32 | namespace tvm { |
33 | namespace topi { |
34 | namespace nn { |
35 | |
36 | using namespace tvm::te; |
37 | |
38 | /*! |
39 | * \brief Layer normalization. |
40 | * \param data N-D tensor with shape [d_0, d_1, ..., d_{N-1}] |
41 | * \param gamma K-D tensor with shape [r_0, r_1, ..., r_{K-1}] where K == len(axis) and |
42 | * d_{axis_k} == r_k |
43 | * \param beta Optional, K-D tensor with shape [r_0, r_1, ..., r_{K-1}] where |
44 | * d_{axis_k} == r_k |
45 | * \param axis The axis to normalize over. |
46 | * \param epsilon The epsilon value to avoid division by zero. |
47 | * \param name The name of the operation. |
48 | * \param tag The tag to mark the operation. |
49 | * \return The normalized tensor, with the same shape as data. |
50 | */ |
51 | inline Tensor layer_norm(const Tensor& data, const Tensor& gamma, const Tensor& beta, |
52 | const Array<Integer>& axis, double epsilon, |
53 | std::string name = "T_layer_norm" , std::string tag = kInjective) { |
54 | // sum x and x^2 |
55 | auto ndim = data->shape.size(); |
56 | ICHECK_NE(ndim, 0) << "Cannot reduce a 0 dim Tensor" ; |
57 | auto real_axis = GetRealAxis(static_cast<int>(ndim), axis); |
58 | auto reduce_axes = MakeReduceAxes(real_axis, data); |
59 | auto target_shape = |
60 | MakeReduceTargetShape(real_axis, data, /*keepdims=*/false, /*atleast1d=*/true); |
61 | auto func = MakeTupleSumReducer(); |
62 | |
63 | auto compute = [ndim, &real_axis, &reduce_axes, &func, &data](const Array<Var>& indices) { |
64 | Array<PrimExpr> eval_range; |
65 | int arg_counter = 0; |
66 | int red_counter = 0; |
67 | |
68 | for (size_t i = 0; i < ndim; ++i) { |
69 | if (std::find(real_axis.begin(), real_axis.end(), i) != real_axis.end()) { |
70 | // real_axis contains i |
71 | eval_range.push_back(reduce_axes[red_counter]); |
72 | red_counter++; |
73 | } else { |
74 | eval_range.push_back(indices[arg_counter]); |
75 | arg_counter++; |
76 | } |
77 | } |
78 | auto square = [](const PrimExpr& x) { return x * x; }; |
79 | return func({data(eval_range), square(data(eval_range))}, reduce_axes, nullptr); |
80 | }; |
81 | |
82 | auto temp_x_x2 = |
83 | tvm::te::compute(target_shape, compute, data->op->name + "_red_temp" , kCommReduce); |
84 | |
85 | auto temp_x = temp_x_x2[0]; |
86 | auto temp_x2 = temp_x_x2[1]; |
87 | |
88 | auto reduce_extent = make_const(data->dtype, 1); |
89 | for (int i : real_axis) { |
90 | reduce_extent *= data->shape[i]; |
91 | } |
92 | auto layer_norm_func = [&](const Array<Var>& indices) { |
93 | Array<Var> reduce_indices, non_reduce_indices; |
94 | for (int i = 0, n = static_cast<int>(indices.size()); i < n; ++i) { |
95 | if (std::find(real_axis.begin(), real_axis.end(), i) != real_axis.end()) { |
96 | reduce_indices.push_back(indices[i]); |
97 | } else { |
98 | non_reduce_indices.push_back(indices[i]); |
99 | } |
100 | } |
101 | auto mean = temp_x(non_reduce_indices) / reduce_extent; |
102 | auto var = temp_x2(non_reduce_indices) / reduce_extent - mean * mean; |
103 | auto layer_norm = (data(indices) - mean) * tvm::rsqrt(var + make_const(var->dtype, epsilon)); |
104 | layer_norm = topi::multiply(layer_norm, gamma(reduce_indices)); |
105 | if (beta.defined()) { |
106 | layer_norm = topi::add(layer_norm, beta(reduce_indices)); |
107 | } |
108 | return layer_norm; |
109 | }; |
110 | return tvm::te::compute(data->shape, layer_norm_func, name, tag); |
111 | } |
112 | |
113 | } // namespace nn |
114 | } // namespace topi |
115 | } // namespace tvm |
116 | |
117 | #endif // TVM_TOPI_NN_LAYER_NORM_H_ |
118 | |