compute.h source code [tensorflow/external/gemmlowp/internal/compute.h]

1	// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	// compute.h: the central stage of the Gemm computation, operates
16	// on already-packed LHS and RHS blocks and calls the Gemm kernel
17	// to compute a block of the product.
18
19	#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
20	#define GEMMLOWP_INTERNAL_COMPUTE_H_
21
22	#include "block_params.h"
23	#include "kernel.h"
24	#include "pack.h"
25
26	namespace gemmlowp {
27
28	template <typename PackedLhs, typename PackedRhs, typename PackedResult>
29	class ComputeImpl {
30	typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
31	typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
32	typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
33
34	const KernelBase& kernel_;
35	const BlockParams& block_params_;
36
37	PackedResult* const packed_result_;
38	const PackedLhs& packed_lhs_;
39	const PackedRhs& packed_rhs_;
40
41	public:
42	ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
43	PackedResult* _packed_result, const PackedLhs& _packed_lhs,
44	const PackedRhs& _packed_rhs)
45	: kernel_(_kernel),
46	block_params_(_block_params),
47	packed_result_(_packed_result),
48	packed_lhs_(_packed_lhs),
49	packed_rhs_(_packed_rhs) {}
50
51	void Compute(int depth) {
52	depth = RoundUp<Format::kDepth>(depth);
53	assert(depth <= block_params_.l2_depth);
54	for (int d = `0`; d < depth; d += block_params_.l1_depth) {
55	int ds = std::min(block_params_.l1_depth, depth - d);
56
57	for (int r = `0`; r < block_params_.l2_rows; r += block_params_.l1_rows) {
58	int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
59
60	ComputeL1(r, rs, `0`, block_params_.l2_cols, d, ds);
61	}
62	}
63	}
64
65	private:
66	static void MarkPackedResultBlockAsInitialized(
67	const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
68	#ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
69	for (int col = `0`; col < packed_result_block.cols(); col++) {
70	MarkMemoryAsInitialized(
71	packed_result_block.data() + col * packed_result_block.cols_stride(),
72	packed_result_block.rows());
73	}
74	#else
75	(void)packed_result_block;
76	#endif
77	}
78
79	void ComputeRun(int start_row, int start_col, int start_depth,
80	int depth) GEMMLOWP_NOINLINE {
81	packed_lhs_.seek_run(start_row, start_depth);
82	packed_rhs_.seek_run(start_col, start_depth);
83	auto packed_result_block = packed_result_->Map().block(
84	start_row, start_col, Format::kRows, Format::kCols);
85	kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
86	packed_result_block.cols_stride(), packed_lhs_.current_data(),
87	packed_rhs_.current_data(), start_depth, depth);
88	MarkPackedResultBlockAsInitialized(packed_result_block);
89	}
90
91	void ComputeL1(int start_row, int rows, int start_col, int cols,
92	int start_depth, int depth) {
93	assert(rows % Format::kRows == `0`);
94	assert(cols % Format::kCols == `0`);
95	assert(depth % Format::kDepth == `0`);
96
97	for (int c = `0`; c < cols; c += Format::kCols) {
98	for (int r = `0`; r < rows; r += Format::kRows) {
99	ComputeRun(start_row + r, start_col + c, start_depth, depth);
100	}
101	}
102	}
103	};
104
105	template <typename PackedLhs, typename PackedRhs, typename PackedResult>
106	void Compute(const KernelBase& kernel, const BlockParams& block_params,
107	PackedResult* packed_result, const PackedLhs& packed_lhs,
108	const PackedRhs& packed_rhs, int depth) {
109	ScopedProfilingLabel label("compute");
110	ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
111	kernel, block_params, packed_result, packed_lhs, packed_rhs);
112
113	impl.Compute(depth);
114	}
115
116	} // namespace gemmlowp
117
118	#endif // GEMMLOWP_INTERNAL_COMPUTE_H_
119

Browse the source code of tensorflow/external/gemmlowp/internal/compute.h