eigen_spatial_convolutions.h source code [tensorflow/tensorflow/core/kernels/eigen_spatial_convolutions.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
17	#define TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
18
19	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20
21	// Note the following header is used in both TF and TFLite. Particularly, it's
22	// used for float TFLite Conv2D.
23	#include "tensorflow/core/kernels/eigen_spatial_convolutions-inl.h"
24
25	#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
26	#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
27
28	namespace Eigen {
29	namespace internal {
30
31	// After we vectorized all loads from the underlying tensor using Packet ops, we
32	// have to finalize coefficients that do not fit into a packet.
33	template <typename Scalar, typename DataMapper, int packet_size,
34	bool masked_load_store>
35	struct FinalizeDataMapperCoeffs {
36	EIGEN_ALWAYS_INLINE static Index finalize(Scalar* block,
37	const DataMapper& rhs,
38	Index base_idx, Index depth,
39	Index max_depth, bool pad = false) {
40	const Index num_coeffs = max_depth - depth;
41	eigen_assert(num_coeffs <= packet_size);
42
43	for (; depth < max_depth; ++depth) {
44	*block = pad ? Scalar(`0`) : rhs.coeffNoPadding(depth, base_idx);
45	++block;
46	}
47
48	return num_coeffs;
49	}
50	};
51
52	template <typename Scalar, typename DataMapper, int packet_size>
53	struct FinalizeDataMapperCoeffs<Scalar, DataMapper, packet_size,
54	/masked_load_store=/true> {
55	EIGEN_ALWAYS_INLINE static Index finalize(Scalar* block,
56	const DataMapper& rhs,
57	Index base_idx, Index depth,
58	Index max_depth, bool pad = false) {
59	Index num_coeffs = max_depth - depth;
60	eigen_assert(num_coeffs <= packet_size);
61	if (num_coeffs == `0`) return `0`;
62
63	using Packet = typename packet_traits<Scalar>::type;
64	Packet p = pad ? pset1<Packet>(Scalar(`0`))
65	: rhs.partialPacketNoPadding(depth, base_idx, num_coeffs);
66	internal::pstoreu(block, p, mask<Packet>(`0`, num_coeffs));
67
68	return num_coeffs;
69	}
70	};
71
72	// Pack a block of the right input matrix (in our case it's always a
73	// "virtual matrix" constructed from extracted image patches) in contiguous
74	// block in column-major storage order. Knowing the properties of the
75	// original patch op we can do it more efficient than the default
76	// gemm_pack_colmajor_block.
77	template <typename NewDimension, Index Rows, Index Cols, typename ArgType,
78	typename Device, typename Scalar, typename StorageIndex,
79	typename nocontract_t, typename contract_t, int packet_size,
80	bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
81	struct gemm_pack_colmajor_block<
82	Scalar, StorageIndex,
83	TensorContractionSubMapper<
84	Scalar, StorageIndex, Rhs,
85	TensorEvaluator<
86	const TensorReshapingOp<
87	NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >,
88	Device>,
89	nocontract_t, contract_t, packet_size, inner_dim_contiguous,
90	inner_dim_reordered, Alignment>,
91	ColMajor> {
92	typedef TensorContractionSubMapper<
93	Scalar, StorageIndex, Rhs,
94	TensorEvaluator<
95	const TensorReshapingOp<
96	NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >,
97	Device>,
98	nocontract_t, contract_t, packet_size, inner_dim_contiguous,
99	inner_dim_reordered, Alignment>
100	SubMapper;
101
102	typedef SubMapper DataMapper;
103	typedef typename packet_traits<Scalar>::type Packet;
104
105	using CoeffFinalizer = FinalizeDataMapperCoeffs<
106	Scalar, DataMapper, packet_size,
107	TensorEvaluatorHasPartialPacket<typename DataMapper::TensorEvaluatorT,
108	Packet, Index>::value &&
109	unpacket_traits<Packet>::masked_store_available>;
110
111	EIGEN_DONT_INLINE
112	void operator()(Scalar* block, const DataMapper& rhs, StorageIndex rows,
113	StorageIndex cols) {
114	const bool standard_patches = !rhs.nonStandardPatches();
115
116	if (standard_patches && (rhs.patchDepth() % packet_size == `0`)) {
117	// Single packet always belong to single patch (row, col).
118	if (rhs.hasPadding()) {
119	packStandardPatches</patch_depth_is_multiple_of_packet_size=/true,
120	/has_padding=/true>(block, rhs, rows, cols);
121	} else {
122	packStandardPatches</patch_depth_is_multiple_of_packet_size=/true,
123	/has_padding=/false>(block, rhs, rows, cols);
124	}
125
126	} else if (standard_patches) {
127	// Single packet can span across multiple patch rows or columns.
128	if (rhs.hasPadding()) {
129	packStandardPatches</patch_depth_is_multiple_of_packet_size=/false,
130	/has_padding=/true>(block, rhs, rows, cols);
131	} else {
132	packStandardPatches</patch_depth_is_multiple_of_packet_size=/false,
133	/has_padding=/false>(block, rhs, rows, cols);
134	}
135
136	} else if (rhs.patchDepth() % packet_size == `0`) {
137	// Single packet always belong to single patch (row, col).
138	packNonStandardPatches</patch_depth_is_multiple_of_packet_size/
139	true>(block, rhs, rows, cols);
140
141	} else {
142	// Single packet can span across multiple patch rows or columns.
143	packNonStandardPatches</patch_depth_is_multiple_of_packet_size/
144	false>(block, rhs, rows, cols);
145	}
146	}
147
148	private:
149	// (A) Standard image patches:
150	//
151	// (1) patch_row_inflate_strides == 1 AND
152	// (2) patch_col_inflate_strides == 1
153	//
154	// Standard patches guarantee that two inner most dimensions (depth and rows)
155	// are contiguous in memory and we can try to squeeze reads from them.
156	//
157	// (B) Non standard image patches: in_row/in_col and patch_row/patch_col
158	// strides can be not equal to 1, and for each [row, col] inside a patch we
159	// have to do additional computations to find corresponding row and col in the
160	// input tensor. Also we can no longer squeeze reads from inner dimensions.
161	//
162	// Additional parameters:
163	// - patch_depth_is_multiple_of_packet_size=true: We are guaranteed to have
164	// depth dimension size to be a multiple of packet size, so we can skip all
165	// non vectorized loads and checks, because it's guaranteed that block size
166	// will be a multiple of a packet size (see TensorContractionBlocking).
167	//
168	// - has_padding: Input tensor has non-zero padding. In this case for each
169	// patch col and row we need to check that it doesn't correspond to the
170	// padded region of original input.
171	template <bool patch_depth_is_multiple_of_packet_size, bool has_padding>
172	EIGEN_ALWAYS_INLINE void packStandardPatches(Scalar* __restrict block,
173	const DataMapper& rhs,
174	StorageIndex rows,
175	StorageIndex cols) {
176	eigen_assert(!rhs.nonStandardPatches());
177
178	// Give vectorized_rows the name used in all other gemm_pack_rhs above.
179	const StorageIndex peeled_k = (rows / packet_size) * packet_size;
180
181	const StorageIndex start_col = rhs.colOffset();
182	const StorageIndex max_col = rhs.maxCol(peeled_k);
183	const StorageIndex rhs_depth_offset = rhs.depthOffset();
184
185	for (StorageIndex col = `0`; col < cols; ++col) {
186	SubMapper lm = rhs.getLinearMapper(`0`, col);
187
188	StorageIndex k = `0`;
189	for (Index c = start_col; c < max_col; ++c) {
190	eigen_assert(k <= peeled_k);
191
192	const StorageIndex start_row = (c == start_col) ? rhs.rowOffset() : `0`;
193	const StorageIndex max_row = rhs.maxRow(peeled_k, c);
194	const bool pad_col = has_padding && lm.padCol(c);
195
196	eigen_assert(has_padding \|\| !lm.padCol(c));
197	eigen_assert(has_padding \|\| !lm.padAnyRow(start_row, max_row - `1`));
198
199	// We can squeeze reads for all rows in [start_row, max_row) range.
200	if (!has_padding \|\|
201	(!pad_col && !lm.padAnyRow(start_row, max_row - `1`))) {
202	const StorageIndex start_depth =
203	(c == start_col) ? rhs_depth_offset : `0`;
204
205	const StorageIndex max_depth =
206	std::min<StorageIndex>(start_depth + (peeled_k - k),
207	(max_row - start_row) * rhs.patchDepth());
208
209	const StorageIndex base_idx = lm.baseIndex(start_row, c);
210
211	if (patch_depth_is_multiple_of_packet_size) {
212	// If patch depth is a multiple of packet size, it's guaranteed that
213	// we can process all values in depth dimension with packets.
214	eigen_assert((max_depth - start_depth) % packet_size == `0`);
215	StorageIndex d = start_depth;
216
217	const StorageIndex unrolled_depth = max_depth - `4` * packet_size;
218	for (; d <= unrolled_depth; d += `4` * packet_size) {
219	eigen_assert(k < peeled_k);
220
221	Packet p0 = rhs.packetNoPadding(d + `0` * packet_size, base_idx);
222	Packet p1 = rhs.packetNoPadding(d + `1` * packet_size, base_idx);
223	Packet p2 = rhs.packetNoPadding(d + `2` * packet_size, base_idx);
224	Packet p3 = rhs.packetNoPadding(d + `3` * packet_size, base_idx);
225
226	internal::pstoreu(block + `0` * packet_size, p0);
227	internal::pstoreu(block + `1` * packet_size, p1);
228	internal::pstoreu(block + `2` * packet_size, p2);
229	internal::pstoreu(block + `3` * packet_size, p3);
230
231	block += `4` * packet_size;
232	k += `4` * packet_size;
233	}
234
235	for (; d < max_depth; d += packet_size) {
236	eigen_assert(k < peeled_k);
237	internal::pstoreu(block, rhs.packetNoPadding(d, base_idx));
238	block += packet_size;
239	k += packet_size;
240	}
241
242	} else {
243	StorageIndex d = start_depth;
244
245	const StorageIndex unrolled_depth = max_depth - `4` * packet_size;
246	for (; d <= unrolled_depth; d += `4` * packet_size) {
247	eigen_assert(k < peeled_k);
248
249	Packet p0 = rhs.packetNoPadding(d + `0` * packet_size, base_idx);
250	Packet p1 = rhs.packetNoPadding(d + `1` * packet_size, base_idx);
251	Packet p2 = rhs.packetNoPadding(d + `2` * packet_size, base_idx);
252	Packet p3 = rhs.packetNoPadding(d + `3` * packet_size, base_idx);
253
254	internal::pstoreu(block + `0` * packet_size, p0);
255	internal::pstoreu(block + `1` * packet_size, p1);
256	internal::pstoreu(block + `2` * packet_size, p2);
257	internal::pstoreu(block + `3` * packet_size, p3);
258
259	block += `4` * packet_size;
260	k += `4` * packet_size;
261	}
262
263	const StorageIndex vectorized_depth = max_depth - packet_size;
264	for (; d <= vectorized_depth; d += packet_size) {
265	eigen_assert(k < peeled_k);
266	internal::pstoreu(block, rhs.packetNoPadding(d, base_idx));
267	block += packet_size;
268	k += packet_size;
269	}
270
271	eigen_assert(k <= peeled_k);
272	const Index num_coeffs =
273	CoeffFinalizer::finalize(block, rhs, base_idx, d, max_depth);
274
275	k += num_coeffs;
276	block += num_coeffs;
277	eigen_assert(k <= peeled_k);
278	}
279
280	// Go to the next column.
281	continue;
282	}
283
284	// If we are not allowed to squeeze reads along the `row` and `depth`
285	// dimensions, we must process rows one by one.
286	for (StorageIndex r = start_row; r < max_row; ++r) {
287	eigen_assert(k <= peeled_k);
288
289	const StorageIndex start_depth =
290	((c == start_col) && (r == start_row)) ? rhs_depth_offset : `0`;
291	const StorageIndex max_depth =
292	rhs.maxDepth(peeled_k - k, start_depth);
293
294	const bool pad = has_padding && (pad_col \|\| lm.padRow(r));
295	eigen_assert(has_padding \|\| !lm.padRow(r));
296
297	const StorageIndex base_idx = lm.baseIndex(r, c);
298
299	if (patch_depth_is_multiple_of_packet_size) {
300	// If patch depth is a multiple of packet size, it's guaranteed that
301	// we can process all values in depth dimension with packets.
302	eigen_assert((max_depth - start_depth) % packet_size == `0`);
303	StorageIndex d = start_depth;
304
305	for (; d < max_depth; d += packet_size) {
306	eigen_assert(k < peeled_k);
307	const Packet p = (has_padding && pad)
308	? pset1<Packet>(Scalar(`0`))
309	: rhs.packetNoPadding(d, base_idx);
310	internal::pstoreu(block, p);
311	block += packet_size;
312	k += packet_size;
313	}
314
315	} else {
316	StorageIndex d = start_depth;
317
318	const StorageIndex vectorized_depth = max_depth - packet_size;
319	for (; d <= vectorized_depth; d += packet_size) {
320	eigen_assert(k < peeled_k);
321	const Packet p = (has_padding && pad)
322	? pset1<Packet>(Scalar(`0`))
323	: rhs.packetNoPadding(d, base_idx);
324	internal::pstoreu(block, p);
325	block += packet_size;
326	k += packet_size;
327	}
328
329	eigen_assert(k <= peeled_k);
330	const Index num_coeffs = CoeffFinalizer::finalize(
331	block, rhs, base_idx, d, max_depth, has_padding && pad);
332
333	k += num_coeffs;
334	block += num_coeffs;
335	eigen_assert(k <= peeled_k);
336	}
337	}
338	}
339
340	// The loop above should fill peeled_k elements.
341	eigen_assert(peeled_k == k);
342
343	// Fill remaining elements using loadCoeffStandard.
344	for (; k < rows; ++k) {
345	*block = lm.loadCoeffStandard(k);
346	++block;
347	}
348	}
349	}
350
351	template <bool patch_depth_is_multiple_of_packet_size>
352	EIGEN_ALWAYS_INLINE void packNonStandardPatches(Scalar* __restrict block,
353	const DataMapper& rhs,
354	StorageIndex rows,
355	StorageIndex cols) {
356	eigen_assert(rhs.nonStandardPatches());
357
358	// Give vectorized_rows the name used in all other gemm_pack_rhs above.
359	const StorageIndex peeled_k = (rows / packet_size) * packet_size;
360
361	const StorageIndex start_col = rhs.colOffset();
362	const StorageIndex max_col = rhs.maxCol(peeled_k);
363	const StorageIndex rhs_depth_offset = rhs.depthOffset();
364
365	// Original input column and row after applying all non-standard strides and
366	// dilations. Computed by padOrSkip{Row,Col}.
367	Index orig_c = `0`;
368	Index orig_r = `0`;
369
370	for (StorageIndex col = `0`; col < cols; ++col) {
371	SubMapper lm = rhs.getLinearMapper(`0`, col);
372
373	StorageIndex k = `0`;
374	for (Index c = start_col; c < max_col; ++c) {
375	eigen_assert(k <= peeled_k);
376
377	const StorageIndex start_row = (c == start_col) ? rhs.rowOffset() : `0`;
378	const StorageIndex max_row = rhs.maxRow(peeled_k, c);
379	const bool pad_or_skip_col = lm.padOrSkipCol(c, &orig_c);
380
381	for (StorageIndex r = start_row; r < max_row; ++r) {
382	eigen_assert(k <= peeled_k);
383
384	const StorageIndex start_depth =
385	((c == start_col) && (r == start_row)) ? rhs_depth_offset : `0`;
386	const StorageIndex max_depth =
387	rhs.maxDepth(peeled_k - k, start_depth);
388
389	const bool pad_or_skip =
390	pad_or_skip_col \|\| lm.padOrSkipRow(r, &orig_r);
391	const StorageIndex base_idx = lm.origBaseIndex(orig_r, orig_c);
392
393	if (patch_depth_is_multiple_of_packet_size) {
394	// If patch depth is a multiple of packet size, it's guaranteed that
395	// we can process all values in depth dimension with packets.
396	eigen_assert((max_depth - start_depth) % packet_size == `0`);
397	StorageIndex d = start_depth;
398
399	for (; d < max_depth; d += packet_size) {
400	eigen_assert(k < peeled_k);
401	const Packet p = pad_or_skip ? pset1<Packet>(Scalar(`0`))
402	: rhs.packetNoPadding(d, base_idx);
403	internal::pstoreu(block, p);
404	block += packet_size;
405	k += packet_size;
406	}
407
408	} else {
409	const StorageIndex vectorized_depth = max_depth - packet_size;
410	StorageIndex d = start_depth;
411	for (; d <= vectorized_depth; d += packet_size) {
412	eigen_assert(k < peeled_k);
413	const Packet p = pad_or_skip ? pset1<Packet>(Scalar(`0`))
414	: rhs.packetNoPadding(d, base_idx);
415	internal::pstoreu(block, p);
416	block += packet_size;
417	k += packet_size;
418	}
419
420	eigen_assert(k <= peeled_k);
421	const Index num_coeffs = CoeffFinalizer::finalize(
422	block, rhs, base_idx, d, max_depth, pad_or_skip);
423
424	k += num_coeffs;
425	block += num_coeffs;
426	eigen_assert(k <= peeled_k);
427	}
428	}
429	}
430
431	// The loop above should fill peeled_k elements.
432	eigen_assert(peeled_k == k);
433
434	// Fill remaining elements using loadCoeff.
435	for (; k < rows; ++k) {
436	*block = lm(k);
437	++block;
438	}
439	}
440	}
441	};
442	} // namespace internal
443	} // namespace Eigen
444	#endif // defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
445	#endif // TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
446

Browse the source code of tensorflow/tensorflow/core/kernels/eigen_spatial_convolutions.h