mul_add.cpp source code [oneDNN/src/gpu/jit/ir/mul_add.cpp]

1	/*******************************************************************************
2	* Copyright 2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include "gpu/jit/ir/mul_add.hpp"
18
19	namespace dnnl {
20	namespace impl {
21	namespace gpu {
22	namespace jit {
23
24	// Performs the following operation:
25	// buf = alpha buf + beta*
26	stmt_t create_mul_add_stmt(ir_context_t &ir_ctx, const expr_t &buf, int size,
27	const type_t &type, float alpha, float beta) {
28	if (alpha == `1` && beta == `0`) return stmt_t ();
29
30	stmt_t ret;
31	int step_bytes = `2` * ir_ctx.hw_cfg().grf_size();
32	for (int i = `0`; i < size; i += step_bytes) {
33	auto elems = std::min(step_bytes, size - i) / type.size();
34	auto e_alpha = shuffle_t::make_broadcast(alpha, elems);
35	auto e_beta = shuffle_t::make_broadcast(beta, elems);
36	auto e = load_t::make(type.with_elems(elems), buf, i);
37	// Avoid extra IR expressions when not needed.
38	if (alpha == `0`)
39	e = shuffle_t::make_broadcast(expr_t(`0.0f`), elems);
40	else if (alpha != `1`)
41	e *= e_alpha;
42	if (beta != `0`) e += e_beta;
43	ir_assert(e.type().scalar() == type);
44	ret = ret.append(store_t::make(buf, i, e));
45	}
46	return ret;
47	}
48
49	} // namespace jit
50	} // namespace gpu
51	} // namespace impl
52	} // namespace dnnl
53

Browse the source code of oneDNN/src/gpu/jit/ir/mul_add.cpp