IROptTest.cpp source code [glow/tests/unittests/IROptTest.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "BackendTestUtils.h"
18	#include "glow/Graph/Graph.h"
19	#include "glow/IR/IR.h"
20	#include "glow/IR/IRBuilder.h"
21	#include "glow/IR/IRUtils.h"
22	#include "glow/IR/Instrs.h"
23	#include "glow/Optimizer/IROptimizer/IRFunctionPassManager.h"
24	#include "glow/Optimizer/IROptimizer/IROptimizer.h"
25
26	#include "llvm/Support/Casting.h"
27
28	#include "gtest/gtest.h"
29
30	#include <algorithm>
31	#include <cassert>
32	#include <cstddef>
33	#include <cstdint>
34	#include <iostream>
35	#include <string>
36
37	using namespace glow;
38	using llvm::cast;
39	using llvm::dyn_cast;
40	using llvm::isa;
41
42	/// Basic test of DSE (Dead Store Elimination)
43	TEST(Optimizer, dseBasic) {
44	Module mod;
45	Function *F = mod.createFunction("DeadStoreElimination");
46	IRFunction M(F);
47	IRBuilder bb(&M);
48
49	auto *input1 = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input1",
50	WeightVar::MutabilityKind::Constant);
51	auto *input2 = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input2",
52	WeightVar::MutabilityKind::Constant);
53	auto *input3 = bb.createWeightVar(glow::ElemKind::BoolTy, {`1`}, "input3",
54	WeightVar::MutabilityKind::Constant);
55	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
56	WeightVar::MutabilityKind::Mutable);
57
58	bb.createElementAddInst("elem_add1", output, input1, input1);
59	bb.createElementSelectInst("select", output, input3, output, input2);
60	bb.createElementAddInst("elem_add2", output, input2, input2);
61
62	optimize(M, MockBackend ().shouldShareBuffers());
63
64	// Check that the first relu instruction and select are eliminated, because
65	// their outputs are never read.
66	EXPECT_EQ(M.getInstrs().size(), `1`);
67	}
68
69	/// Check that DSE does not remove the last write into a WeightVar.
70	TEST(Optimizer, dseDoNotRemloveLastWriteIntoWeightVar) {
71	Module mod;
72	Function *F = mod.createFunction("DeadStoreElimination");
73	IRFunction M(F);
74	IRBuilder bb(&M);
75
76	auto *input1 = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input1",
77	WeightVar::MutabilityKind::Constant);
78	auto *input2 = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input2",
79	WeightVar::MutabilityKind::Constant);
80	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
81	WeightVar::MutabilityKind::Mutable);
82
83	// Last write into a WeightVar should not be removed even if there is
84	// no instruction that reads it, because it is an observable side-effect.
85	bb.createElementAddInst("elem_add", output, input1, input2);
86	bb.createTensorViewInst(
87	"cast", output, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `1`, `1`})),
88	{`0`});
89
90	optimize(M, MockBackend ().shouldShareBuffers());
91
92	// Check that the first relu instruction and select are eliminated, because
93	// their outputs are never read.
94	EXPECT_EQ(M.getInstrs().size(), `1`);
95	}
96
97	TEST(Optimizer, shareBuffers) {
98	Module mod;
99	Function *F = mod.createFunction("ShareBuffers");
100	IRFunction M(F);
101	IRBuilder bb(&M);
102
103	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input",
104	WeightVar::MutabilityKind::Constant);
105	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
106	WeightVar::MutabilityKind::Mutable);
107
108	auto *alloc1 =
109	bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, `1`);
110	auto *alloc2 =
111	bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, `1`);
112	auto *alloc3 =
113	bb.createAllocActivationInst("alloc3", glow::ElemKind::FloatTy, `1`);
114	bb.createSplatInst("splat1", alloc1, `0.0`);
115	bb.createSplatInst("splat2", alloc2, `1.0`);
116	bb.createElementAddInst("elem_add1", alloc3, alloc1, input);
117	bb.createElementAddInst("elem_add2", alloc2, input, input);
118	// alloc1 and alloc2 are not live after this instruction.
119	bb.createElementAddInst("elem_add3", alloc1, alloc2, input);
120	bb.createCopyInst("copy", output, alloc3);
121	bb.createDeallocActivationInst("dealloc3", alloc3);
122	bb.createDeallocActivationInst("dealloc2", alloc2);
123	bb.createDeallocActivationInst("dealloc1", alloc1);
124
125	optimize(M, MockBackend ().shouldShareBuffers());
126
127	// Check that the first relu instruction and select are eliminated, because
128	// their outputs are never read.
129	EXPECT_EQ(M.getInstrs().size(), `2`);
130	}
131
132	TEST(Optimizer, deleteDeadViews) {
133	Module mod;
134	Function *F = mod.createFunction("DeleteDeadViews");
135	IRFunction M(F);
136	IRBuilder bb(&M);
137
138	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input",
139	WeightVar::MutabilityKind::Constant);
140	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
141	WeightVar::MutabilityKind::Mutable);
142
143	auto *tensorView1 = bb.createTensorViewInst(
144	"tensor_view1", input,
145	mod.uniqueType(Type {glow::ElemKind::FloatTy, {`1`, `1`}}), {`0`});
146
147	bb.createTensorViewInst("tensor_view2", tensorView1,
148	mod.uniqueType(Type {glow::ElemKind::FloatTy, {`1`}}),
149	{`0`, `0`});
150	bb.createCopyInst("copy", output, input);
151
152	optimize(M, MockBackend ().shouldShareBuffers());
153
154	// Check that all tensor_view instructions are eliminated, because they are
155	// never used.
156	EXPECT_EQ(M.getInstrs().size(), `1`);
157	}
158
159	TEST(Optimizer, copyPropagation) {
160	Module mod;
161	Function *F = mod.createFunction("ShareBuffers");
162	IRFunction M(F);
163	IRBuilder bb(&M);
164
165	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input",
166	WeightVar::MutabilityKind::Constant);
167	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
168	WeightVar::MutabilityKind::Mutable);
169
170	auto *alloc1 =
171	bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, `1`);
172	auto *alloc2 =
173	bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, `1`);
174	auto *alloc3 =
175	bb.createAllocActivationInst("alloc3", glow::ElemKind::FloatTy, `1`);
176	bb.createSplatInst("splat1", alloc1, `1.0`);
177	bb.createCopyInst("copy1", alloc2, alloc1);
178	bb.createElementAddInst("elem_add1", output, alloc2, input);
179	bb.createSplatInst("splat2", alloc1, `0.0`);
180	bb.createElementAddInst("elem_add2", output, alloc2, alloc1);
181	bb.createDeallocActivationInst("dealloc3", alloc3);
182	bb.createDeallocActivationInst("dealloc2", alloc2);
183	bb.createDeallocActivationInst("dealloc1", alloc1);
184
185	optimize(M, MockBackend ().shouldShareBuffers());
186
187	EXPECT_EQ(M.getInstrs().size(), `5`);
188
189	auto &instrs = M.getInstrs();
190	EXPECT_TRUE(std::none_of(
191	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
192	return I.getKind() == Instruction::Kind::CopyInstKind;
193	}));
194	}
195
196	TEST(Optimizer, copyPropagationSimple) {
197	Module mod;
198	auto *F = mod.createFunction("ShareBuffers");
199	IRFunction M(F);
200	IRBuilder bb(&M);
201
202	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "input",
203	WeightVar::MutabilityKind::Constant);
204	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`}, "output",
205	WeightVar::MutabilityKind::Mutable);
206
207	auto *alloc1 =
208	bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, `1`);
209	auto *alloc2 =
210	bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, `1`);
211	bb.createSplatInst("splat1", alloc1, `1.0`);
212	bb.createCopyInst("copy1", alloc2, alloc1);
213	bb.createElementAddInst("elem_add1", output, alloc2, input);
214	bb.createDeallocActivationInst("dealloc2", alloc2);
215	bb.createDeallocActivationInst("dealloc1", alloc1);
216
217	optimize(M, MockBackend ().shouldShareBuffers());
218
219	EXPECT_EQ(M.getInstrs().size(), `2`);
220
221	auto &instrs = M.getInstrs();
222	EXPECT_TRUE(std::none_of(
223	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
224	return isa<AllocActivationInst>(&I) \|\| isa<DeallocActivationInst>(&I) \|\|
225	isa<CopyInst>(&I);
226	}));
227	}
228
229	TEST(Optimizer, copyPropagationTranspose) {
230	Module mod;
231	Function *F = mod.createFunction("ShareBuffers");
232	IRFunction M(F);
233	IRBuilder bb(&M);
234
235	auto *output1 =
236	bb.createWeightVar(glow::ElemKind::FloatTy, {`3`, `1`, `1`}, "output1",
237	WeightVar::MutabilityKind::Mutable);
238	auto *output2 =
239	bb.createWeightVar(glow::ElemKind::FloatTy, {`1`, `1`, `3`}, "output2",
240	WeightVar::MutabilityKind::Mutable);
241
242	auto *alloc1 = bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy,
243	{`1`, `1`, `3`});
244	auto *alloc2 = bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy,
245	{`3`, `1`, `1`});
246	bb.createSplatInst("splat1", alloc1, `1.0`);
247	bb.createTransposeInst("transpose", alloc2, alloc1, {`2`, `0`, `1`});
248	bb.createElementAddInst("elem_add2", output1, alloc2, alloc2);
249	bb.createElementAddInst("elem_add2", output2, alloc1, alloc1);
250	bb.createDeallocActivationInst("dealloc2", alloc2);
251	bb.createDeallocActivationInst("dealloc1", alloc1);
252
253	optimize(M, MockBackend ().shouldShareBuffers());
254
255	EXPECT_EQ(M.getInstrs().size(), `5`);
256
257	auto &instrs = M.getInstrs();
258	EXPECT_TRUE(std::none_of(
259	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
260	return isa<TransposeInst>(&I) \|\| isa<AllocActivationInst>(&I) \|\|
261	isa<DeallocActivationInst>(&I);
262	}));
263	}
264
265	/// Test the isSliceContiguous utility function.
266	TEST(Optimizer, isSliceContiguous) {
267	EXPECT_EQ(isSliceContiguous({`1`, `1`, `1`}, {`3`, `3`, `3`}), true);
268	EXPECT_EQ(isSliceContiguous({`1`, `1`, `2`}, {`3`, `3`, `3`}), true);
269	EXPECT_EQ(isSliceContiguous({`1`, `1`, `3`}, {`3`, `3`, `3`}), true);
270	EXPECT_EQ(isSliceContiguous({`1`, `2`, `1`}, {`3`, `3`, `3`}), false);
271	EXPECT_EQ(isSliceContiguous({`1`, `2`, `2`}, {`3`, `3`, `3`}), false);
272	EXPECT_EQ(isSliceContiguous({`1`, `2`, `3`}, {`3`, `3`, `3`}), true);
273	EXPECT_EQ(isSliceContiguous({`1`, `3`, `1`}, {`3`, `3`, `3`}), false);
274	EXPECT_EQ(isSliceContiguous({`1`, `3`, `2`}, {`3`, `3`, `3`}), false);
275	EXPECT_EQ(isSliceContiguous({`1`, `3`, `3`}, {`3`, `3`, `3`}), true);
276	EXPECT_EQ(isSliceContiguous({`2`, `1`, `1`}, {`3`, `3`, `3`}), false);
277	EXPECT_EQ(isSliceContiguous({`2`, `1`, `2`}, {`3`, `3`, `3`}), false);
278	EXPECT_EQ(isSliceContiguous({`2`, `1`, `3`}, {`3`, `3`, `3`}), false);
279	EXPECT_EQ(isSliceContiguous({`2`, `2`, `1`}, {`3`, `3`, `3`}), false);
280	EXPECT_EQ(isSliceContiguous({`2`, `2`, `2`}, {`3`, `3`, `3`}), false);
281	EXPECT_EQ(isSliceContiguous({`2`, `2`, `3`}, {`3`, `3`, `3`}), false);
282	EXPECT_EQ(isSliceContiguous({`2`, `3`, `1`}, {`3`, `3`, `3`}), false);
283	EXPECT_EQ(isSliceContiguous({`2`, `3`, `2`}, {`3`, `3`, `3`}), false);
284	EXPECT_EQ(isSliceContiguous({`2`, `3`, `3`}, {`3`, `3`, `3`}), true);
285	EXPECT_EQ(isSliceContiguous({`3`, `1`, `1`}, {`3`, `3`, `3`}), false);
286	EXPECT_EQ(isSliceContiguous({`3`, `1`, `2`}, {`3`, `3`, `3`}), false);
287	EXPECT_EQ(isSliceContiguous({`3`, `1`, `3`}, {`3`, `3`, `3`}), false);
288	EXPECT_EQ(isSliceContiguous({`3`, `2`, `1`}, {`3`, `3`, `3`}), false);
289	EXPECT_EQ(isSliceContiguous({`3`, `2`, `2`}, {`3`, `3`, `3`}), false);
290	EXPECT_EQ(isSliceContiguous({`3`, `2`, `3`}, {`3`, `3`, `3`}), false);
291	EXPECT_EQ(isSliceContiguous({`3`, `3`, `1`}, {`3`, `3`, `3`}), false);
292	EXPECT_EQ(isSliceContiguous({`3`, `3`, `2`}, {`3`, `3`, `3`}), false);
293	EXPECT_EQ(isSliceContiguous({`3`, `3`, `3`}, {`3`, `3`, `3`}), true);
294	}
295
296	/// Utility function for testing the optimization of an InsertTensorInstruction
297	/// to a TensorViewInstruction when the inserted tensor (slice) is contiguous.
298	static void testInsertOptimizer(llvm::ArrayRef<dim_t> srcShape,
299	llvm::ArrayRef<dim_t> destShape,
300	llvm::ArrayRef<dim_t> offsets) {
301	Module mod;
302	Function *F = mod.createFunction("InsertOptimizer");
303	IRFunction M(F);
304	IRBuilder bb(&M);
305
306	auto *dest = bb.createWeightVar(glow::ElemKind::FloatTy, destShape, "dest",
307	WeightVar::MutabilityKind::Mutable);
308	auto *srcAlloc = bb.createAllocActivationInst(
309	"srcAlloc", glow::ElemKind::FloatTy, srcShape);
310	bb.createSplatInst("srcSplat", srcAlloc, `1.0`);
311	bb.createSplatInst("destSplat", dest, `2.0`);
312	bb.createInsertTensorInst("insert", dest, srcAlloc, offsets, `1`, `0`);
313	bb.createDeallocActivationInst("deallocSrc", srcAlloc);
314
315	optimize(M, MockBackend ().shouldShareBuffers());
316
317	auto &instrs = M.getInstrs();
318	if (srcShape == destShape) {
319	// If the slice was fully inserted then we should be left with only the
320	// the source Splat.
321	EXPECT_EQ(instrs.size(), `1`);
322	EXPECT_EQ(instrs.begin()->getName().str(), std::string ("srcSplat"));
323	} else if (isSliceContiguous(srcShape, destShape)) {
324	// If the slice is contiguous then we should be left with 2 Splats and a
325	// TensorView. The Insert, Alloc and Dealloc should be gone.
326	EXPECT_EQ(instrs.size(), `3`);
327	EXPECT_TRUE(std::all_of(
328	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
329	return isa<SplatInst>(&I) \|\| isa<TensorViewInst>(&I);
330	}));
331	} else {
332	// If the slice is not contiguous, we should be left with the original
333	// instructions: Alloc, 2 Splats, Insert, Dealloc.
334	EXPECT_EQ(instrs.size(), `5`);
335	}
336	}
337
338	/// Simple test where a single Insert is replaced by a TensorView with offsets.
339	TEST(Optimizer, insertOptimizer) {
340	testInsertOptimizer({`1`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
341	testInsertOptimizer({`1`, `1`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `1`});
342	testInsertOptimizer({`1`, `1`, `3`}, {`3`, `3`, `3`}, {`2`, `2`, `0`});
343	testInsertOptimizer({`1`, `2`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
344	testInsertOptimizer({`1`, `2`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `0`});
345	testInsertOptimizer({`1`, `2`, `3`}, {`3`, `3`, `3`}, {`2`, `0`, `0`});
346	testInsertOptimizer({`1`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
347	testInsertOptimizer({`1`, `3`, `2`}, {`3`, `3`, `3`}, {`1`, `0`, `1`});
348	testInsertOptimizer({`1`, `3`, `3`}, {`3`, `3`, `3`}, {`2`, `0`, `0`});
349	testInsertOptimizer({`2`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
350	testInsertOptimizer({`2`, `1`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `0`});
351	testInsertOptimizer({`2`, `1`, `3`}, {`3`, `3`, `3`}, {`0`, `2`, `0`});
352	testInsertOptimizer({`2`, `2`, `1`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
353	testInsertOptimizer({`2`, `2`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `1`});
354	testInsertOptimizer({`2`, `2`, `3`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
355	testInsertOptimizer({`2`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
356	testInsertOptimizer({`2`, `3`, `2`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
357	testInsertOptimizer({`2`, `3`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
358	testInsertOptimizer({`3`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
359	testInsertOptimizer({`3`, `1`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `1`});
360	testInsertOptimizer({`3`, `1`, `3`}, {`3`, `3`, `3`}, {`0`, `2`, `0`});
361	testInsertOptimizer({`3`, `2`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
362	testInsertOptimizer({`3`, `2`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `0`});
363	testInsertOptimizer({`3`, `2`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
364	testInsertOptimizer({`3`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
365	testInsertOptimizer({`3`, `3`, `2`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
366	testInsertOptimizer({`3`, `3`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
367	}
368
369	/// Utility function for testing the optimization of an ExtractTensorInstruction
370	/// to a TensorViewInstruction when the inserted tensor (slice) is contiguous.
371	static void testExtractOptimizer(llvm::ArrayRef<dim_t> destShape,
372	llvm::ArrayRef<dim_t> srcShape,
373	llvm::ArrayRef<dim_t> offsets) {
374	Module mod;
375	Function *F = mod.createFunction("ExtractOptimizer");
376	IRFunction M(F);
377	IRBuilder bb(&M);
378
379	auto *src = bb.createWeightVar(glow::ElemKind::FloatTy, srcShape, "src",
380	WeightVar::MutabilityKind::Mutable);
381	auto *dest = bb.createWeightVar(glow::ElemKind::FloatTy, destShape, "dest",
382	WeightVar::MutabilityKind::Mutable);
383	bb.createSplatInst("srcSplat", src, `1.0`);
384	auto *destAlloc =
385	bb.createAllocActivationInst("dest", glow::ElemKind::FloatTy, destShape);
386	bb.createExtractTensorInst("extract", destAlloc, src, offsets);
387	bb.createCopyInst("save", dest, destAlloc);
388	bb.createDeallocActivationInst("deallocDest", destAlloc);
389
390	optimize(M, MockBackend ().shouldShareBuffers());
391
392	auto &instrs = M.getInstrs();
393	if (destShape == srcShape) {
394	// If the slice was fully extracted then we should be left with a Splat
395	// and a Copy. The Alloc, Extract and Dealloc should be gone.
396	EXPECT_EQ(instrs.size(), `2`);
397	EXPECT_TRUE(std::all_of(instrs.begin(), instrs.end(),
398	[](const Instruction &I) -> bool {
399	return isa<SplatInst>(&I) \|\| isa<CopyInst>(&I);
400	}));
401	} else if (isSliceContiguous(destShape, srcShape)) {
402	// If the extracted slice is contiguous then we should be left with a Splat,
403	// a TensorView and a Copy. The Extract, Alloc and Dealloc should be gone.
404	EXPECT_EQ(instrs.size(), `3`);
405	EXPECT_TRUE(std::all_of(
406	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
407	return isa<SplatInst>(&I) \|\| isa<TensorViewInst>(&I) \|\|
408	isa<CopyInst>(&I);
409	}));
410	} else {
411	// If the slice is not contiguous, we should be left with a Splat and an
412	// Extract. The Alloc, Copy and Dealloc should be gone.
413	EXPECT_EQ(instrs.size(), `2`);
414	EXPECT_TRUE(std::all_of(
415	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
416	return isa<SplatInst>(&I) \|\| isa<ExtractTensorInst>(&I);
417	}));
418	}
419	}
420
421	/// Simple test where a single Extract is replaced by a TensorView with offsets.
422	TEST(Optimizer, extractOptimizer) {
423	testExtractOptimizer({`1`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
424	testExtractOptimizer({`1`, `1`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `1`});
425	testExtractOptimizer({`1`, `1`, `3`}, {`3`, `3`, `3`}, {`2`, `2`, `0`});
426	testExtractOptimizer({`1`, `2`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
427	testExtractOptimizer({`1`, `2`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `0`});
428	testExtractOptimizer({`1`, `2`, `3`}, {`3`, `3`, `3`}, {`2`, `0`, `0`});
429	testExtractOptimizer({`1`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
430	testExtractOptimizer({`1`, `3`, `2`}, {`3`, `3`, `3`}, {`1`, `0`, `1`});
431	testExtractOptimizer({`1`, `3`, `3`}, {`3`, `3`, `3`}, {`2`, `0`, `0`});
432	testExtractOptimizer({`2`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
433	testExtractOptimizer({`2`, `1`, `2`}, {`3`, `3`, `3`}, {`1`, `1`, `0`});
434	testExtractOptimizer({`2`, `1`, `3`}, {`3`, `3`, `3`}, {`0`, `2`, `0`});
435	testExtractOptimizer({`2`, `2`, `1`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
436	testExtractOptimizer({`2`, `2`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `1`});
437	testExtractOptimizer({`2`, `2`, `3`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
438	testExtractOptimizer({`2`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
439	testExtractOptimizer({`2`, `3`, `2`}, {`3`, `3`, `3`}, {`1`, `0`, `0`});
440	testExtractOptimizer({`2`, `3`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
441	testExtractOptimizer({`3`, `1`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
442	testExtractOptimizer({`3`, `1`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `1`});
443	testExtractOptimizer({`3`, `1`, `3`}, {`3`, `3`, `3`}, {`0`, `2`, `0`});
444	testExtractOptimizer({`3`, `2`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
445	testExtractOptimizer({`3`, `2`, `2`}, {`3`, `3`, `3`}, {`0`, `1`, `0`});
446	testExtractOptimizer({`3`, `2`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
447	testExtractOptimizer({`3`, `3`, `1`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
448	testExtractOptimizer({`3`, `3`, `2`}, {`3`, `3`, `3`}, {`0`, `0`, `1`});
449	testExtractOptimizer({`3`, `3`, `3`}, {`3`, `3`, `3`}, {`0`, `0`, `0`});
450	}
451
452	/// This is representative of what a ConcatNode is IRGen'd into: src1 and src2
453	/// represent the two tensors that are being concatenated, and dest represents
454	/// the resulting concatenated tensor.
455	TEST(Optimizer, twoInsertsWithBuffersOptimizer) {
456	Module mod;
457	Function *F = mod.createFunction("InsertWithBufferOptimizer");
458	IRFunction M(F);
459	IRBuilder bb(&M);
460
461	auto *output =
462	bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `4`, `5`}, "output",
463	WeightVar::MutabilityKind::Mutable);
464
465	auto *allocSrc1 = bb.createAllocActivationInst(
466	"allocSrc1", glow::ElemKind::FloatTy, {`2`, `4`, `5`});
467	auto *allocSrc2 = bb.createAllocActivationInst(
468	"allocSrc2", glow::ElemKind::FloatTy, {`2`, `4`, `5`});
469	auto *allocDest = bb.createAllocActivationInst(
470	"allocDest", glow::ElemKind::FloatTy, {`4`, `4`, `5`});
471
472	bb.createSplatInst("splatSrc1", allocSrc1, `1.0`);
473	bb.createSplatInst("splatSrc2", allocSrc2, `2.0`);
474	bb.createSplatInst("splatDest", allocDest, `3.0`);
475
476	bb.createInsertTensorInst("insert1", allocDest, allocSrc1, {`0`, `0`, `0`}, `1`, `0`);
477	bb.createInsertTensorInst("insert2", allocDest, allocSrc2, {`2`, `0`, `0`}, `1`, `0`);
478
479	bb.createCopyInst("copy", output, allocDest);
480
481	bb.createDeallocActivationInst("deallocDest", allocDest);
482	bb.createDeallocActivationInst("deallocSrc2", allocSrc2);
483	bb.createDeallocActivationInst("deallocSrc1", allocSrc1);
484
485	optimize(M, MockBackend ().shouldShareBuffers());
486
487	// After optimization, should be left with three splats and two tensorviews;
488	// the inserts, allocs, and deallocs should be gone.
489	auto &instrs = M.getInstrs();
490	EXPECT_EQ(instrs.size(), `5`);
491	EXPECT_TRUE(std::all_of(
492	instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
493	return isa<SplatInst>(&I) \|\| isa<TensorViewInst>(&I);
494	}));
495	}
496
497	/// This is representative of what a SliceNode is IRGen'd into: src is the
498	/// original source tensor, and then two slices are created into dest1 and
499	/// dest2.
500	TEST(Optimizer, twoExtractsWithBuffersOptimizer) {
501	Module mod;
502	Function *F = mod.createFunction("ExtractWithBufferOptimizer");
503	IRFunction M(F);
504	IRBuilder bb(&M);
505
506	auto *output1 =
507	bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `4`, `5`}, "output1",
508	WeightVar::MutabilityKind::Mutable);
509	auto *output2 =
510	bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `4`, `5`}, "output2",
511	WeightVar::MutabilityKind::Mutable);
512
513	auto *allocSrc = bb.createAllocActivationInst(
514	"allocSrc", glow::ElemKind::FloatTy, {`4`, `4`, `5`});
515	auto *allocDest1 = bb.createAllocActivationInst(
516	"allocDest1", glow::ElemKind::FloatTy, {`2`, `4`, `5`});
517	auto *allocDest2 = bb.createAllocActivationInst(
518	"allocDest2", glow::ElemKind::FloatTy, {`2`, `4`, `5`});
519
520	bb.createSplatInst("splatSrc", allocSrc, `3.0`);
521
522	bb.createExtractTensorInst("extract1", allocDest1, allocSrc, {`0`, `0`, `0`});
523	bb.createExtractTensorInst("extract2", allocDest2, allocSrc, {`2`, `0`, `0`});
524
525	bb.createCopyInst("copy", output1, allocDest1);
526	bb.createCopyInst("copy", output2, allocDest2);
527
528	bb.createDeallocActivationInst("deallocSrc", allocSrc);
529	bb.createDeallocActivationInst("deallocDest2", allocDest2);
530	bb.createDeallocActivationInst("deallocDest1", allocDest1);
531
532	optimize(M, MockBackend ().shouldShareBuffers());
533
534	// After optimization, the extracts should be gone, as well as both allocDests
535	// and their deallocs. Should be left with splatSrc, allocSrc, deallocSrc, two
536	// tensorviews, and two copies from the tensorviews into the outputs.
537	auto &instrs = M.getInstrs();
538	EXPECT_EQ(instrs.size(), `7`);
539	EXPECT_TRUE(std::none_of(
540	instrs.begin(), instrs.end(),
541	[](const Instruction &I) -> bool { return isa<ExtractTensorInst>(&I); }));
542	}
543
544	/// Check that we are able to coalesce a copy forward from the input.
545	/// This test consists in copy from the input variable.
546	/// Its may characteristic is that this copy cannot be coalesced with
547	/// the output (otherwise it would be a backward chain of
548	/// copies from output).
549	/// The shareBuffers optimization works backward, so as long as
550	/// it manages to coalesce things with output one by one, we
551	/// won't see if the forward copies are properly handled.
552	TEST(Optimizer, forwardCopy) {
553	Module mod;
554	Function *F = mod.createFunction("forwardCopy");
555	IRFunction M(F);
556	IRBuilder bb(&M);
557
558	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`64`}, "input",
559	WeightVar::MutabilityKind::Mutable);
560	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "output",
561	WeightVar::MutabilityKind::Mutable);
562	auto *tmp1 =
563	bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {`64`});
564	bb.createCopyInst("copy1", tmp1, input);
565
566	auto *view = bb.createTensorViewInst(
567	"view", tmp1, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `64`})),
568	{`0`});
569	bb.createInsertTensorInst("copyOutput", output, view, {`0`, `0`}, `1`, `0`);
570
571	bb.createDeallocActivationInst("dealloc1", tmp1);
572
573	auto &instrs = M.getInstrs();
574	auto nbInstrsBeforeOpt = instrs.size();
575	optimize(M, MockBackend ().shouldShareBuffers());
576
577	// After optimization, the copy should have been coalesced with input.
578	// nbIntrsBeforeOpt - 1 copy - 1 dealloc - 1 alloc
579	EXPECT_EQ(instrs.size(),
580	nbInstrsBeforeOpt /copy/ - `1` /alloca/ - `1` /dealloc/ - `1`);
581	EXPECT_TRUE(std::none_of(instrs.begin(), instrs.end(),
582	[](const Instruction &I) -> bool {
583	return isa<AllocActivationInst>(&I);
584	}));
585	EXPECT_TRUE(std::none_of(
586	instrs.begin(), instrs.end(),
587	[](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
588	}
589
590	/// Check that we are able to coalesce chain of copies
591	/// forward from the input.
592	/// This test is similar to forwardCopy, expect it uses a chain of copies (more
593	/// than one) instead of just on copy from input.
594	TEST(Optimizer, chainOfTwoForwardCopies) {
595	Module mod;
596	Function *F = mod.createFunction("chainOfTwoForwardCopies");
597	IRFunction M(F);
598	IRBuilder bb(&M);
599
600	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`64`}, "input",
601	WeightVar::MutabilityKind::Mutable);
602	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "output",
603	WeightVar::MutabilityKind::Mutable);
604	auto *tmp1 =
605	bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {`64`});
606	bb.createCopyInst("copy1", tmp1, input);
607
608	auto *tmp2 =
609	bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {`64`});
610	bb.createCopyInst("copy2", tmp2, tmp1);
611	auto *view = bb.createTensorViewInst(
612	"view", tmp2, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `64`})),
613	{`0`});
614	bb.createInsertTensorInst("copyOutput", output, view, {`0`, `0`}, `1`, `0`);
615
616	bb.createDeallocActivationInst("dealloc1", tmp1);
617	bb.createDeallocActivationInst("dealloc2", tmp2);
618
619	auto &instrs = M.getInstrs();
620	auto nbInstrsBeforeOpt = instrs.size();
621	optimize(M, MockBackend ().shouldShareBuffers());
622
623	// After optimization, the copies should have been coalesced with
624	// input.
625	// Ideally, we should get rid of 2 copies, the related 2 allocactivations and
626	// deallocation.
627	// Therefore expected instructions should be
628	// nbIntrsBeforeOpt - 2 copies - 2 dealloc - 2 alloc
629	EXPECT_EQ(instrs.size(),
630	nbInstrsBeforeOpt /copy/ - `2` /alloca/ - `2` /dealloc/ - `2`);
631	EXPECT_TRUE(std::none_of(instrs.begin(), instrs.end(),
632	[](const Instruction &I) -> bool {
633	return isa<AllocActivationInst>(&I);
634	}));
635	EXPECT_TRUE(std::none_of(
636	instrs.begin(), instrs.end(),
637	[](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
638	}
639
640	/// The idea of this test is to have live intervals looking like this:
641	/// A B
642	/// \| <-copy \|
643	/// inout \|
644	/// \| \|
645	/// Because of the inout on A, A and B interfere.
646	/// Make sure we don't coalesce such buffers.
647	TEST(Optimizer, inoutCopy) {
648	Module mod;
649	Function *F = mod.createFunction("inoutCopy");
650	IRFunction M(F);
651	IRBuilder bb(&M);
652
653	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "input",
654	WeightVar::MutabilityKind::Mutable);
655	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`3`, `64`}, "output",
656	WeightVar::MutabilityKind::Mutable);
657	auto *output2 =
658	bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "output2",
659	WeightVar::MutabilityKind::Mutable);
660	// This copy cannot be eliminated because input must not be changed.
661	// Indeed, this is an observable variable plus it is used as a source
662	// for a copy to output2.
663	auto *tmp1 =
664	bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {`2`, `64`});
665	bb.createCopyInst("copy1", tmp1, input);
666
667	auto *tmp2 =
668	bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {`64`});
669	bb.createSplatInst("splat", tmp2, `3.0`);
670	auto *view =
671	bb.createTensorView(ElemKind::FloatTy, {`1`, `64`}, tmp2, "view", {`0`});
672	bb.createInsertTensorInst("insertTmp1", tmp1, view, {`0`, `0`}, `1`, `0`);
673	bb.createInsertTensorInst("insertOutput", output, tmp1, {`1`, `0`}, `1`, `0`);
674	bb.createCopyInst("copyOutput2", output2, input);
675
676	bb.createDeallocActivationInst("dealloc1", tmp1);
677	bb.createDeallocActivationInst("dealloc2", tmp2);
678
679	optimize(M, MockBackend ().shouldShareBuffers());
680
681	// After optimization, the copies shouldn't have been touched.
682	// tmp1 = copy input cannot be coalesced because tmp1 is inout.
683	// output2 = copy input cannot be coalesced because they are both
684	// externally visible.
685	EXPECT_EQ(input->getNumUsers(), `2`);
686	EXPECT_TRUE(
687	std::all_of(input->getUsers().begin(), input->getUsers().end(),
688	[](const Use &I) -> bool { return isa<CopyInst>(I.get()); }));
689	const Value *expectedDest[] = {tmp1, output2};
690	unsigned idx = `0`;
691	for (const Use &use : input->getUsers()) {
692	if (idx == sizeof(expectedDest) / sizeof(expectedDest[`0`])) {
693	// If we end up here that means that input has too many users.
694	EXPECT_FALSE(true);
695	break;
696	}
697	EXPECT_EQ(use.get()->getOperand(`0`).first, expectedDest[idx++]);
698	}
699	}
700
701	/// Check that we properly define a buffer when we extend its live-range
702	/// on a segment of the source that does not have any definition.
703	/// A source live-range without any definition can happen when this
704	/// is the first use of a WeightVar.
705	/// At the high level, this test looks like this:
706	/// WeightVar Buffer
707	/// \| useA
708	/// \| useB \| def
709	/// \| redef \| save to output
710	/// - UseA is the first use of WeightVar and we want it to be replaced by
711	/// a use of Buffer. I.e., Buffer live-range is extended toward the top.
712	/// - UseB involves both WeightVar and Buffer. It exposes the buffer sharing
713	/// opportunity between these two variables. It must happen after useA
714	/// to expose the case of extending the live-range of a buffer toward
715	/// the top where no definition exists.
716	/// - redef redefines WeightVar. It is necessary otherwise both useA and useB
717	/// could all share the same buffer and thus, we would extend the live-range
718	/// of the buffer in useA downward (or the use of Buffer up to the
719	/// definition of the buffer in useA), which is not what we want to test.
720	/// - save to output is required to keep the def of Buffer alive. Moreover,
721	/// the save must be done in such a way that the output buffer and Buffer
722	/// must not be able to share the same buffer. Otherwise, the live-range
723	/// of output buffer will be extended upward to useB and given output and
724	/// WeightVar are both externally observable, the output buffer cannot be
725	/// merged with WeightVar.
726	/// Therefore, we won't expose an extension of output up to useA
727	/// and won't test the case where the replaced buffer doesn't have any
728	/// definition.
729	///
730	/// The expected result at a high level looks like this:
731	/// WeightVar Buffer
732	/// \| copy \| <- Buffer gets WeightVar
733	/// useA \| <- Buffer is used instead of WeightVar
734	/// useB \| def <- ditto
735	/// \| redef \| save to output
736	TEST(Optimizer, bufferReuseWithoutDefs) {
737	Module mod;
738	Function *F = mod.createFunction("bufferReuseWithoutDefs");
739	IRFunction M(F);
740	IRBuilder bb(&M);
741
742	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`64`}, "input",
743	WeightVar::MutabilityKind::Mutable);
744	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "output",
745	WeightVar::MutabilityKind::Mutable);
746	auto *tmp1 =
747	bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {`64`});
748
749	auto *tmp2 =
750	bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {`64`});
751	auto *tmp3 =
752	bb.createAllocActivationInst("tmp3", glow::ElemKind::FloatTy, {`64`});
753
754	bb.createSplatInst("tmp2init", tmp2, `1.0`);
755	// use input for some stuff.
756	auto *useA = bb.createElementAddInst("useA", tmp3, tmp2, input);
757	// Make the first user of input a dependency of the definition
758	// of tmp1 that way the scheduler cannot mess with the layout
759	// we want for the instructions ordering.
760	bb.createElementAddInst("useB", tmp1, input, tmp3);
761	bb.createCopyInst("redef", input, tmp3);
762	auto *view = bb.createTensorViewInst(
763	"view", tmp1, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `64`})),
764	{`0`});
765	bb.createInsertTensorInst("save", output, view, {`0`, `0`}, `1`, `0`);
766
767	bb.createDeallocActivationInst("dealloc1", tmp1);
768	bb.createDeallocActivationInst("dealloc2", tmp2);
769	bb.createDeallocActivationInst("dealloc2", tmp3);
770
771	optimize(M, MockBackend ().shouldShareBuffers());
772
773	// Check that we manage to expose the problematic case we wanted:
774	// tmp1 is extended upward and replace the use of input.
775	EXPECT_EQ(useA->getRHS(), tmp1);
776	// Check that tmp1 is properly defined before useA.
777	Instruction instBeforeUseA = &std::prev(useA->getIterator());
778
779	EXPECT_TRUE(isa<CopyInst>(instBeforeUseA));
780	// The somewhat complicated check is to make sure we don't crash the test
781	// when instBeforeUseA is not a copy.
782	// I.e., this test was failing (instead of crashing) when the
783	// bug was present.
784	EXPECT_EQ(instBeforeUseA->getNumOperands() > `0`
785	? instBeforeUseA->getOperand(`0`).first
786	: nullptr,
787	tmp1);
788	EXPECT_EQ(instBeforeUseA->getNumOperands() > `1`
789	? instBeforeUseA->getOperand(`1`).first
790	: nullptr,
791	input);
792	}
793
794	/// Same as bufferReuseWithoutDefs but with casts in the middle.
795	/// This makes sure that we properly set the types for whatever fixup
796	/// code we will insert.
797	/// The high level view of the test is:
798	/// WeightVar Buffer
799	/// \| useA
800	/// \| useB(cast)\| def
801	/// \| redef \| save to output
802	///
803	/// The expected result at a high level looks like this:
804	/// WeightVar Buffer
805	/// \| copy(cast)\| <- Buffer gets WeightVar
806	/// useA(cast)\| <- Buffer is used instead of WeightVar
807	/// useB \| def <- ditto
808	/// \| redef \| save to output
809	TEST(Optimizer, bufferReuseWithoutDefsPlusCasts) {
810	Module mod;
811	Function *F = mod.createFunction("bufferReuseWithoutDefsPlusCasts");
812	IRFunction M(F);
813	IRBuilder bb(&M);
814
815	auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`1`, `64`}, "input",
816	WeightVar::MutabilityKind::Mutable);
817	auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "output",
818	WeightVar::MutabilityKind::Mutable);
819	auto *tmp1 =
820	bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {`64`});
821
822	auto *tmp2 =
823	bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {`1`, `64`});
824	auto *tmp3 =
825	bb.createAllocActivationInst("tmp3", glow::ElemKind::FloatTy, {`1`, `64`});
826
827	bb.createSplatInst("tmp2init", tmp2, `1.0`);
828	auto *useA = bb.createElementAddInst("useA", tmp3, tmp2, input);
829	auto *inputView = bb.createTensorViewInst(
830	"inputView", input, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`64`})),
831	{`0`, `0`});
832	auto *tmp3View = bb.createTensorViewInst(
833	"tmp3View", tmp3, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`64`})),
834	{`0`, `0`});
835
836	bb.createElementAddInst("useB", tmp1, inputView, tmp3View);
837	bb.createCopyInst("redef", input, tmp3);
838	auto *view = bb.createTensorViewInst(
839	"view", tmp1, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `64`})),
840	{`0`});
841	bb.createInsertTensorInst("save", output, view, {`0`, `0`}, `1`, `0`);
842
843	bb.createDeallocActivationInst("dealloc1", tmp1);
844	bb.createDeallocActivationInst("dealloc2", tmp2);
845	bb.createDeallocActivationInst("dealloc2", tmp3);
846
847	optimize(M, MockBackend ().shouldShareBuffers());
848
849	// Check that we manage to expose the problematic case we wanted:
850	// tmp1 is extended upward and replace the use of input.
851	Value *useARHS = useA->getRHS();
852	EXPECT_EQ(getOrigin(useARHS), tmp1);
853	Instruction *tmp1TensorView = dyn_cast<TensorViewInst>(useARHS);
854	EXPECT_TRUE(tmp1TensorView && tmp1TensorView->getOperand(`0`).first == tmp1);
855	// Check that tmp1 is properly defined before useA.
856	Instruction *tmp1Fixup =
857	tmp1TensorView ? &std::prev(tmp1TensorView->getIterator()) : nullptr*;
858	EXPECT_TRUE(tmp1Fixup && isa<CopyInst>(tmp1Fixup));
859	// The somewhat complicated check is to make sure we don't crash the test
860	// when instBeforeUseA is not a copy.
861	EXPECT_EQ((tmp1Fixup && tmp1Fixup->getNumOperands() > `0`)
862	? getOrigin(tmp1Fixup->getOperand(`0`).first)
863	: nullptr,
864	tmp1);
865	// Now check that input feeds tmp1Fixup and was properly casted.
866	Instruction *inputCast =
867	(tmp1Fixup && tmp1Fixup->getNumOperands() > `1`)
868	? dyn_cast<TensorViewInst>(tmp1Fixup->getOperand(`1`).first)
869	: nullptr;
870	EXPECT_EQ(inputCast ? getOrigin(inputCast) : nullptr, input);
871	EXPECT_EQ(inputCast ? inputCast->getOperand(`0`).first : nullptr, input);
872	}
873
874	/// Check that a copy from a buffer to itself is
875	/// detected when both src and dest are hidden under TensorView
876	/// instructions and eliminated if the linearized offsets of the src and dest
877	/// are equal.
878	TEST(Optimizer, copyEliminationTensorViewToTensorView) {
879	Module mod;
880	Function *F = mod.createFunction("copyEliminationTensorViewToTensorView");
881	IRFunction M(F);
882	IRBuilder bb(&M);
883
884	// Test that a copy between tensorviews with identical offsets which have
885	// different src operands with different offsets into the same underlying
886	// buffer is not optimised away.
887
888	// Create a WeightVar for TensorViews to use as their source operand.
889	auto *A = bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `2`}, "A",
890	WeightVar::MutabilityKind::Mutable);
891
892	// Create a view into A.
893	auto *view1 = bb.createTensorViewInst(
894	"view1", A, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `2`, `1`})),
895	{`0`, `0`});
896
897	// Create another view into A with the same shape as view1 but different
898	// offsets.
899	auto *view2 = bb.createTensorViewInst(
900	"view2", A, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `2`, `1`})),
901	{`1`, `1`});
902
903	// Create views into view1 and view2 with identical offsets.
904	auto *view3 = bb.createTensorViewInst(
905	"view3", view1, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`2`, `1`})),
906	{`0`, `0`, `0`});
907
908	auto *view4 = bb.createTensorViewInst(
909	"view4", view2, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`2`, `1`})),
910	{`0`, `0`, `0`});
911
912	// Create a copy from view3 to view4. These views both point to 2 elements in
913	// A starting at offset {0, 0}, so this should be optimized out.
914	bb.createCopyInst("copyViewToView", view3, view4);
915
916	auto &instrs = M.getInstrs();
917	optimize(M, MockBackend ().shouldShareBuffers());
918
919	// All instructions should remain because the linearized offsets of the final
920	// tensorview are not the same.
921	EXPECT_EQ(instrs.size(), `5`);
922	EXPECT_FALSE(std::none_of(
923	instrs.begin(), instrs.end(),
924	[](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
925	EXPECT_FALSE(std::none_of(
926	instrs.begin(), instrs.end(),
927	[](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
928
929	// Reset state for next test.
930	M.clear();
931	M.setGraph(F);
932
933	// Test that a copy between tensorviews with different offsets which have
934	// different src operands with different offsets but have the same linearized
935	// offset into the same underlying buffer is optimized away.
936
937	// Create a WeightVar for TensorViews to use as their source operand.
938	auto *D = bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `2`}, "B",
939	WeightVar::MutabilityKind::Mutable);
940
941	// Create another WeightVar. E will be copied into this to avoid
942	// optimizing all instructions away.
943	auto *E = bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `2`}, "C",
944	WeightVar::MutabilityKind::Mutable);
945
946	// Create a view into D. The linearized offset of this TensorView is 0 and the
947	// size is 8.
948	auto *view7 = bb.createTensorViewInst(
949	"view7", D, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`4`, `2`, `1`})),
950	{`0`, `0`});
951
952	// Create a view into view7. The linearized offset of this TensorView is
953	// 4 and the size is 2.
954	auto *view8 = bb.createTensorViewInst(
955	"view8", view7, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`2`})),
956	{`2`, `0`, `0`});
957
958	// Create a view into D. The linearized offset of this TensorView is 4 and the
959	// size is 4.
960	auto *view9 = bb.createTensorViewInst(
961	"view9", D, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`4`})), {`2`, `0`});
962
963	// Create a view into view9. The linearized offset of this TensorView is 4 and
964	// the size is 2.
965	auto *view10 = bb.createTensorViewInst(
966	"view10", view9, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`2`})), {`0`});
967
968	// Create a copy from view8 to view 10. Since the linearized offsets and types
969	// of the two views are identical, this copy should be optimized out.
970	bb.createCopyInst("copyViewToView", view8, view10);
971
972	// Insert D into E just to make sure the IR isn't empty after optimisation.
973	bb.createInsertTensorInst("copyOutput", E, D, /Offsets=/{`0`, `0`},
974	/Count=/`1`, /Axis=/`0`);
975
976	optimize(M, MockBackend ().shouldShareBuffers());
977
978	// Only one instruction (the InsertTensor) should remain.
979	EXPECT_EQ(instrs.size(), `1`);
980	EXPECT_TRUE(std::none_of(
981	instrs.begin(), instrs.end(),
982	[](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
983	EXPECT_TRUE(std::none_of(
984	instrs.begin(), instrs.end(),
985	[](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
986	}
987
988	/// Check that a copy from a buffer to itself is
989	/// detected when the src is hidden under a layer of TensorView instructions and
990	/// eliminated if the linearized offsets of the src and dest are equal.
991	TEST(Optimizer, copyEliminationTensorViewBuffer) {
992	Module mod;
993	Function *F = mod.createFunction("copyEliminationTensorViewToBuffer");
994	IRFunction M(F);
995	IRBuilder bb(&M);
996
997	// Create a WeightVar for TensorViews to use as their source operand.
998	auto *B = bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `2`}, "B",
999	WeightVar::MutabilityKind::Mutable);
1000
1001	// Create another WeightVar. B will be copied into this to avoid
1002	// optimizing all instructions away.
1003	auto *C = bb.createWeightVar(glow::ElemKind::FloatTy, {`4`, `2`}, "C",
1004	WeightVar::MutabilityKind::Mutable);
1005
1006	// Create two stacked views into A. Two are required because a tensorview
1007	// that has the same type as its src is eliminated before copy elimination is
1008	// applied.
1009	auto *view1 = bb.createTensorViewInst(
1010	"view1", B, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`1`, `4`, `2`})),
1011	{`0`, `0`});
1012
1013	auto *view2 = bb.createTensorViewInst(
1014	"view2", view1, mod.uniqueType(Type (glow::ElemKind::FloatTy, {`4`, `2`})),
1015	{`0`, `0`, `0`});
1016
1017	// Create a copy from view2 to B. This view points to the start of A and has
1018	// the same type, so this should be optimized out.
1019	bb.createCopyInst("copyViewToBuf", view2, B);
1020
1021	// Create a copy from B to view2. This should also be optimized out for the
1022	// same reason.
1023	bb.createCopyInst("copyBufToView", B, view2);
1024
1025	// Insert B into C. This exists just to make sure the optimised IR isn't
1026	// empty.
1027	bb.createInsertTensorInst("copyOutput", C, B, /Offsets=/{`0`, `0`},
1028	/Count=/`1`, /Axis=/`0`);
1029
1030	auto &instrs = M.getInstrs();
1031	optimize(M, MockBackend ().shouldShareBuffers());
1032
1033	// Only one instruction (the InsertTensor) should remain.
1034	EXPECT_EQ(instrs.size(), `1`);
1035	EXPECT_TRUE(std::none_of(
1036	instrs.begin(), instrs.end(),
1037	[](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
1038	EXPECT_TRUE(std::none_of(
1039	instrs.begin(), instrs.end(),
1040	[](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
1041	}
1042
1043	/// Check if dump functions work for Value and IRFunction.
1044	TEST(Optimizer, dumpDataStructure) {
1045	Module mod;
1046	Function *F = mod.createFunction("inoutCopy");
1047	IRFunction M(F);
1048	IRBuilder bb(&M);
1049
1050	Value *input = bb.createWeightVar(glow::ElemKind::FloatTy, {`2`, `64`}, "input",
1051	WeightVar::MutabilityKind::Mutable);
1052	// Dump Value.
1053	std::string storageV1;
1054	llvm::raw_string_ostream osV1(storageV1);
1055	input->dump(osV1);
1056	std::string mesV = input->toString();
1057	std::string expectMesV = R"(%input = WeightVar float<2 x 64> mutable)";
1058	EXPECT_EQ(mesV, expectMesV);
1059	EXPECT_EQ(mesV, osV1.str());
1060	std::string storageV2;
1061	llvm::raw_string_ostream osV2(storageV2);
1062	osV2 << input;
1063	EXPECT_EQ(mesV, osV2.str());
1064	// Dump IRFunction.
1065	std::string storageIRF1;
1066	llvm::raw_string_ostream osIRF1(storageIRF1);
1067	M.dump(osIRF1);
1068	std::string mesI = M.toString();
1069	std::string expectMesI = R"(function inoutCopy
1070	declare {
1071	%input = WeightVar float<2 x 64> mutable // size: 512
1072
1073	; size = 512 bytes
1074	}
1075
1076	code {
1077	}
1078	)";
1079	EXPECT_EQ(mesI, expectMesI);
1080	EXPECT_EQ(mesI, osIRF1.str());
1081	std::string storageIRF2;
1082	llvm::raw_string_ostream osIRF2(storageIRF2);
1083	osIRF2 << M;
1084	EXPECT_EQ(mesI, osIRF2.str());
1085	}
1086

Browse the source code of glow/tests/unittests/IROptTest.cpp