dense.h source code [tvm/include/tvm/topi/cuda/dense.h]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file cuda/dense.h
22	* \brief CUDA schedule for dense operation
23	*/
24	#ifndef TVM_TOPI_CUDA_DENSE_H_
25	#define TVM_TOPI_CUDA_DENSE_H_
26
27	#include <tvm/target/generic_func.h>
28	#include <tvm/te/operation.h>
29	#include <tvm/te/schedule_pass.h>
30	#include <tvm/topi/contrib/cublas.h>
31	#include <tvm/topi/detail/array_utils.h>
32	#include <tvm/topi/generic/extern.h>
33	#include <tvm/topi/nn/dense.h>
34	#include <tvm/topi/tags.h>
35
36	namespace tvm {
37	namespace topi {
38
39	using namespace tvm::te;
40
41	namespace cuda {
42	/!*
43	* \brief Implementation of dense for CUDA backend
44	*
45	* \param target The target device
46	* \param data Tensor with shape [batch, in_dim]
47	* \param weight Tensor with shape [out_dim, in_dim]
48	* \param bias Tensor with shape [out_dim]. Optional; to omit bias, pass Tensor()
49	* \param out_dtype Output data type. Used for mixed precision.
50	*
51	* \return Tensor with shape [batch, out_dim]
52	*/
53	inline tvm::te::Tensor dense_cuda(const Target& target, const tvm::te::Tensor& data,
54	const tvm::te::Tensor& weight, const tvm::te::Tensor& bias,
55	const DataType& out_dtype) {
56	ICHECK_EQ(data ->shape.size(), `2`) << "dense requires 2-D data";
57	ICHECK_EQ(weight ->shape.size(), `2`) << "dense requires 2-D weight";
58	if (bias.defined()) {
59	ICHECK_EQ(bias ->shape.size(), `1`) << "dense requires 1-D bias";
60	}
61
62	auto batch = data ->shape [`0`];
63	auto in_dim = data ->shape [`1`];
64	auto out_dim = weight ->shape [`0`];
65
66	if (target ->GetLibs().count("cublas")) {
67	ICHECK_EQ(data ->dtype, out_dtype) << "Mixed precision not supported.";
68	auto mm = topi::contrib::cublas_matmul(data, weight, false, true);
69	if (bias.defined()) {
70	mm = tvm::te::compute(
71	{batch, out_dim}, [&](Var i, Var j) { return mm (i, j) + bias (j); }, "tensor", kBroadcast);
72	}
73
74	return mm;
75	} else {
76	return topi::nn::dense(data, weight, bias, out_dtype);
77	}
78	}
79
80	/!*
81	* \brief Create a CUDA schedule for dense
82	*
83	* \param target The target to generate a schedule for.
84	* \param outs The output tensors.
85	*
86	* \return A schedule for the given ops.
87	*/
88	inline Schedule schedule_dense(const Target& target, const Array<Tensor>& outs) {
89	if (target ->kind ->name == "cuda" && target ->GetLibs().count("cublas")) {
90	return topi::generic::schedule_extern(target, outs);
91	}
92
93	Array<Operation> out_ops;
94	for (auto t : outs) {
95	out_ops.push_back(t ->op);
96	}
97	auto s = create_schedule(out_ops);
98
99	auto _schedule = [&](const Tensor& dense) {
100	auto num_thread = `64`;
101	auto k = dense ->op.as<ComputeOpNode>()->reduce_axis [`0`];
102	IterVar ko, kf;
103	s [dense].split(k, num_thread, &ko, &kf);
104	auto dense_f = s.rfactor(dense, kf)[`0`];
105
106	Tensor out;
107	if (detail::contains(s ->outputs, dense ->op)) {
108	out = dense;
109	} else {
110	out = outs [`0`]->op.output(`0`);
111	s [dense].compute_at(s [out], s [out]->op.as<ComputeOpNode>()->axis [`1`]);
112	}
113	s [out].bind(s [out]->op.as<ComputeOpNode>()->axis [`0`],
114	tvm::te::thread_axis(Range (), "blockIdx.y"));
115	s [out].bind(s [out]->op.as<ComputeOpNode>()->axis [`1`],
116	tvm::te::thread_axis(Range (), "blockIdx.x"));
117
118	auto tx = s [dense]->op.as<ComputeOpNode>()->reduce_axis [`0`];
119	auto thread_x = tvm::te::thread_axis(Range (), "threadIdx.x");
120	s [dense].bind(tx, thread_x);
121	s [dense_f].compute_at(s [dense], tx);
122	s [dense].set_store_predicate(static_cast<PrimExpr>(thread_x) == `0`);
123	s [out].set_store_predicate(static_cast<PrimExpr>(thread_x) == `0`);
124	};
125
126	std::function<void(Operation)> traverse;
127	traverse = [&](const Operation& op) {
128	// Inline all one-to-one-mapping operators except the last stage (output)
129	if (is_broadcast(op ->tag)) {
130	if (!detail::contains(s ->outputs, op)) {
131	s [op].compute_inline();
132	}
133	for (auto tensor : op ->InputTensors()) {
134	if (tensor ->op ->InputTensors().size() > `0`) {
135	traverse (tensor ->op);
136	}
137	}
138	} else if (op ->tag == "dense") {
139	// If tag starts with global_pool
140	auto dense = op.output(`0`);
141	_schedule(dense);
142	} else {
143	LOG(ERROR) << "Unsupported operator " << op ->tag;
144	}
145	};
146
147	traverse (outs [`0`]->op);
148	return s;
149	}
150
151	} // namespace cuda
152	} // namespace topi
153	} // namespace tvm
154	#endif // TVM_TOPI_CUDA_DENSE_H_
155

Browse the source code of tvm/include/tvm/topi/cuda/dense.h