prepare_packed_matrices.cc source code [tensorflow/external/ruy/ruy/prepare_packed_matrices.cc]

1	/ Copyright 2020 Google LLC. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "ruy/prepare_packed_matrices.h"
17
18	#include "ruy/allocator.h"
19	#include "ruy/ctx.h"
20	#include "ruy/matrix.h"
21	#include "ruy/prepacked_cache.h"
22	#include "ruy/side_pair.h"
23	#include "ruy/trace.h"
24	#include "ruy/trmul_params.h"
25
26	namespace ruy {
27	namespace {
28
29	// Returns true if the operand on the given side should use caching of the
30	// packed form. This may either be explicitly dictated by its cache_policy
31	// (if it is kNeverCache, the default, or kAlwaysCache), or it may depend
32	// on a heuristic decision based on the other operand's width. For example,
33	// in a matrixvector product, for the LHS matrix operand, the other side is*
34	// the RHS vector, with a width of 1, causing the packing of the LHS to be
35	// a large fraction of the overall work, so a heuristic would typically
36	// decide in favor of caching, if permitted at all by the cache_policy.
37	bool ShouldCache(const TrMulParams& params, Side side) {
38	const CachePolicy cache_policy = params.src [side].cache_policy;
39	// The width that matters is that of the other side, it is what determines
40	// the amortization of the packing work done on the present side.
41	const Side other_side = OtherSide(side);
42	const int other_width = params.src [other_side].layout.cols;
43	const int other_kernel_width =
44	params.packed_matrix [other_side].layout.kernel.cols;
45	switch (cache_policy) {
46	case CachePolicy::kNeverCache:
47	return false;
48	case CachePolicy::kAlwaysCache:
49	return true;
50	case CachePolicy::kCacheIfLargeSpeedup:
51	// The condition (other_width <= other_kernel_width) means that the kernel
52	// will traverse each value of the present side only once, meaning that
53	// the overhead of the packing work will be maximal, hence maximally
54	// worth caching.
55	return (other_width <= other_kernel_width);
56	case CachePolicy::kCacheIfSignificantSpeedup:
57	// Variant of the heuristic used in the kCacheIfLargeSpeedup case. The
58	// kernel will run on each value of the present side only a few times,
59	// so packing overhead will be significant.
60	return (other_width <= `4` * other_kernel_width);
61	default:
62	RUY_DCHECK(false);
63	return false;
64	}
65	}
66
67	} // namespace
68
69	void PreparePackedMatrices(Ctx* ctx, TrMulParams* params) {
70	RUY_TRACE_SCOPE;
71	for (Side side : {Side::kLhs, Side::kRhs}) {
72	PEMat& packed_matrix = params->packed_matrix [side];
73	if (ShouldCache(*params, side)) {
74	// Use a cached packed matrix (possibly packing and caching now).
75	auto* cache = ctx->GetPrepackedCache();
76	auto action = cache->Get(params->src [side].data, &packed_matrix);
77	RUY_TRACE_INFO(PREPARE_PACKED_MATRICES_SHOULD_CACHE);
78	if (action == PrepackedCache::Action::kInsertedNewEntry) {
79	params->RunPack(side, ctx->GetMainThreadTuning(), `0`,
80	packed_matrix.layout.cols);
81	}
82	params->is_prepacked [side] = true;
83	} else {
84	RUY_TRACE_INFO(PREPARE_PACKED_MATRICES_NO_CACHE);
85	// Do not use a cached packed matrix. Only need to allocate buffers now.
86	Allocator* allocator = ctx->GetMainAllocator();
87	packed_matrix.data = allocator->AllocateBytesAvoidingAliasingWith(
88	DataBytes(packed_matrix), params->src [side].data);
89	packed_matrix.sums = allocator->AllocateBytes(SumsBytes(packed_matrix));
90	}
91	}
92	}
93
94	} // namespace ruy
95

Browse the source code of tensorflow/external/ruy/ruy/prepare_packed_matrices.cc