1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BackendTestUtils.h"
18#include "glow/Graph/Graph.h"
19#include "glow/IR/IR.h"
20#include "glow/IR/IRBuilder.h"
21#include "glow/IR/IRUtils.h"
22#include "glow/IR/Instrs.h"
23#include "glow/Optimizer/IROptimizer/IRFunctionPassManager.h"
24#include "glow/Optimizer/IROptimizer/IROptimizer.h"
25
26#include "llvm/Support/Casting.h"
27
28#include "gtest/gtest.h"
29
30#include <algorithm>
31#include <cassert>
32#include <cstddef>
33#include <cstdint>
34#include <iostream>
35#include <string>
36
37using namespace glow;
38using llvm::cast;
39using llvm::dyn_cast;
40using llvm::isa;
41
42/// Basic test of DSE (Dead Store Elimination)
43TEST(Optimizer, dseBasic) {
44 Module mod;
45 Function *F = mod.createFunction("DeadStoreElimination");
46 IRFunction M(F);
47 IRBuilder bb(&M);
48
49 auto *input1 = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input1",
50 WeightVar::MutabilityKind::Constant);
51 auto *input2 = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input2",
52 WeightVar::MutabilityKind::Constant);
53 auto *input3 = bb.createWeightVar(glow::ElemKind::BoolTy, {1}, "input3",
54 WeightVar::MutabilityKind::Constant);
55 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
56 WeightVar::MutabilityKind::Mutable);
57
58 bb.createElementAddInst("elem_add1", output, input1, input1);
59 bb.createElementSelectInst("select", output, input3, output, input2);
60 bb.createElementAddInst("elem_add2", output, input2, input2);
61
62 optimize(M, MockBackend().shouldShareBuffers());
63
64 // Check that the first relu instruction and select are eliminated, because
65 // their outputs are never read.
66 EXPECT_EQ(M.getInstrs().size(), 1);
67}
68
69/// Check that DSE does not remove the last write into a WeightVar.
70TEST(Optimizer, dseDoNotRemloveLastWriteIntoWeightVar) {
71 Module mod;
72 Function *F = mod.createFunction("DeadStoreElimination");
73 IRFunction M(F);
74 IRBuilder bb(&M);
75
76 auto *input1 = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input1",
77 WeightVar::MutabilityKind::Constant);
78 auto *input2 = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input2",
79 WeightVar::MutabilityKind::Constant);
80 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
81 WeightVar::MutabilityKind::Mutable);
82
83 // Last write into a WeightVar should not be removed even if there is
84 // no instruction that reads it, because it is an observable side-effect.
85 bb.createElementAddInst("elem_add", output, input1, input2);
86 bb.createTensorViewInst(
87 "cast", output, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 1, 1})),
88 {0});
89
90 optimize(M, MockBackend().shouldShareBuffers());
91
92 // Check that the first relu instruction and select are eliminated, because
93 // their outputs are never read.
94 EXPECT_EQ(M.getInstrs().size(), 1);
95}
96
97TEST(Optimizer, shareBuffers) {
98 Module mod;
99 Function *F = mod.createFunction("ShareBuffers");
100 IRFunction M(F);
101 IRBuilder bb(&M);
102
103 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input",
104 WeightVar::MutabilityKind::Constant);
105 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
106 WeightVar::MutabilityKind::Mutable);
107
108 auto *alloc1 =
109 bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, 1);
110 auto *alloc2 =
111 bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, 1);
112 auto *alloc3 =
113 bb.createAllocActivationInst("alloc3", glow::ElemKind::FloatTy, 1);
114 bb.createSplatInst("splat1", alloc1, 0.0);
115 bb.createSplatInst("splat2", alloc2, 1.0);
116 bb.createElementAddInst("elem_add1", alloc3, alloc1, input);
117 bb.createElementAddInst("elem_add2", alloc2, input, input);
118 // alloc1 and alloc2 are not live after this instruction.
119 bb.createElementAddInst("elem_add3", alloc1, alloc2, input);
120 bb.createCopyInst("copy", output, alloc3);
121 bb.createDeallocActivationInst("dealloc3", alloc3);
122 bb.createDeallocActivationInst("dealloc2", alloc2);
123 bb.createDeallocActivationInst("dealloc1", alloc1);
124
125 optimize(M, MockBackend().shouldShareBuffers());
126
127 // Check that the first relu instruction and select are eliminated, because
128 // their outputs are never read.
129 EXPECT_EQ(M.getInstrs().size(), 2);
130}
131
132TEST(Optimizer, deleteDeadViews) {
133 Module mod;
134 Function *F = mod.createFunction("DeleteDeadViews");
135 IRFunction M(F);
136 IRBuilder bb(&M);
137
138 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input",
139 WeightVar::MutabilityKind::Constant);
140 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
141 WeightVar::MutabilityKind::Mutable);
142
143 auto *tensorView1 = bb.createTensorViewInst(
144 "tensor_view1", input,
145 mod.uniqueType(Type{glow::ElemKind::FloatTy, {1, 1}}), {0});
146
147 bb.createTensorViewInst("tensor_view2", tensorView1,
148 mod.uniqueType(Type{glow::ElemKind::FloatTy, {1}}),
149 {0, 0});
150 bb.createCopyInst("copy", output, input);
151
152 optimize(M, MockBackend().shouldShareBuffers());
153
154 // Check that all tensor_view instructions are eliminated, because they are
155 // never used.
156 EXPECT_EQ(M.getInstrs().size(), 1);
157}
158
159TEST(Optimizer, copyPropagation) {
160 Module mod;
161 Function *F = mod.createFunction("ShareBuffers");
162 IRFunction M(F);
163 IRBuilder bb(&M);
164
165 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input",
166 WeightVar::MutabilityKind::Constant);
167 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
168 WeightVar::MutabilityKind::Mutable);
169
170 auto *alloc1 =
171 bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, 1);
172 auto *alloc2 =
173 bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, 1);
174 auto *alloc3 =
175 bb.createAllocActivationInst("alloc3", glow::ElemKind::FloatTy, 1);
176 bb.createSplatInst("splat1", alloc1, 1.0);
177 bb.createCopyInst("copy1", alloc2, alloc1);
178 bb.createElementAddInst("elem_add1", output, alloc2, input);
179 bb.createSplatInst("splat2", alloc1, 0.0);
180 bb.createElementAddInst("elem_add2", output, alloc2, alloc1);
181 bb.createDeallocActivationInst("dealloc3", alloc3);
182 bb.createDeallocActivationInst("dealloc2", alloc2);
183 bb.createDeallocActivationInst("dealloc1", alloc1);
184
185 optimize(M, MockBackend().shouldShareBuffers());
186
187 EXPECT_EQ(M.getInstrs().size(), 5);
188
189 auto &instrs = M.getInstrs();
190 EXPECT_TRUE(std::none_of(
191 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
192 return I.getKind() == Instruction::Kind::CopyInstKind;
193 }));
194}
195
196TEST(Optimizer, copyPropagationSimple) {
197 Module mod;
198 auto *F = mod.createFunction("ShareBuffers");
199 IRFunction M(F);
200 IRBuilder bb(&M);
201
202 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "input",
203 WeightVar::MutabilityKind::Constant);
204 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {1}, "output",
205 WeightVar::MutabilityKind::Mutable);
206
207 auto *alloc1 =
208 bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy, 1);
209 auto *alloc2 =
210 bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy, 1);
211 bb.createSplatInst("splat1", alloc1, 1.0);
212 bb.createCopyInst("copy1", alloc2, alloc1);
213 bb.createElementAddInst("elem_add1", output, alloc2, input);
214 bb.createDeallocActivationInst("dealloc2", alloc2);
215 bb.createDeallocActivationInst("dealloc1", alloc1);
216
217 optimize(M, MockBackend().shouldShareBuffers());
218
219 EXPECT_EQ(M.getInstrs().size(), 2);
220
221 auto &instrs = M.getInstrs();
222 EXPECT_TRUE(std::none_of(
223 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
224 return isa<AllocActivationInst>(&I) || isa<DeallocActivationInst>(&I) ||
225 isa<CopyInst>(&I);
226 }));
227}
228
229TEST(Optimizer, copyPropagationTranspose) {
230 Module mod;
231 Function *F = mod.createFunction("ShareBuffers");
232 IRFunction M(F);
233 IRBuilder bb(&M);
234
235 auto *output1 =
236 bb.createWeightVar(glow::ElemKind::FloatTy, {3, 1, 1}, "output1",
237 WeightVar::MutabilityKind::Mutable);
238 auto *output2 =
239 bb.createWeightVar(glow::ElemKind::FloatTy, {1, 1, 3}, "output2",
240 WeightVar::MutabilityKind::Mutable);
241
242 auto *alloc1 = bb.createAllocActivationInst("alloc1", glow::ElemKind::FloatTy,
243 {1, 1, 3});
244 auto *alloc2 = bb.createAllocActivationInst("alloc2", glow::ElemKind::FloatTy,
245 {3, 1, 1});
246 bb.createSplatInst("splat1", alloc1, 1.0);
247 bb.createTransposeInst("transpose", alloc2, alloc1, {2, 0, 1});
248 bb.createElementAddInst("elem_add2", output1, alloc2, alloc2);
249 bb.createElementAddInst("elem_add2", output2, alloc1, alloc1);
250 bb.createDeallocActivationInst("dealloc2", alloc2);
251 bb.createDeallocActivationInst("dealloc1", alloc1);
252
253 optimize(M, MockBackend().shouldShareBuffers());
254
255 EXPECT_EQ(M.getInstrs().size(), 5);
256
257 auto &instrs = M.getInstrs();
258 EXPECT_TRUE(std::none_of(
259 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
260 return isa<TransposeInst>(&I) || isa<AllocActivationInst>(&I) ||
261 isa<DeallocActivationInst>(&I);
262 }));
263}
264
265/// Test the isSliceContiguous utility function.
266TEST(Optimizer, isSliceContiguous) {
267 EXPECT_EQ(isSliceContiguous({1, 1, 1}, {3, 3, 3}), true);
268 EXPECT_EQ(isSliceContiguous({1, 1, 2}, {3, 3, 3}), true);
269 EXPECT_EQ(isSliceContiguous({1, 1, 3}, {3, 3, 3}), true);
270 EXPECT_EQ(isSliceContiguous({1, 2, 1}, {3, 3, 3}), false);
271 EXPECT_EQ(isSliceContiguous({1, 2, 2}, {3, 3, 3}), false);
272 EXPECT_EQ(isSliceContiguous({1, 2, 3}, {3, 3, 3}), true);
273 EXPECT_EQ(isSliceContiguous({1, 3, 1}, {3, 3, 3}), false);
274 EXPECT_EQ(isSliceContiguous({1, 3, 2}, {3, 3, 3}), false);
275 EXPECT_EQ(isSliceContiguous({1, 3, 3}, {3, 3, 3}), true);
276 EXPECT_EQ(isSliceContiguous({2, 1, 1}, {3, 3, 3}), false);
277 EXPECT_EQ(isSliceContiguous({2, 1, 2}, {3, 3, 3}), false);
278 EXPECT_EQ(isSliceContiguous({2, 1, 3}, {3, 3, 3}), false);
279 EXPECT_EQ(isSliceContiguous({2, 2, 1}, {3, 3, 3}), false);
280 EXPECT_EQ(isSliceContiguous({2, 2, 2}, {3, 3, 3}), false);
281 EXPECT_EQ(isSliceContiguous({2, 2, 3}, {3, 3, 3}), false);
282 EXPECT_EQ(isSliceContiguous({2, 3, 1}, {3, 3, 3}), false);
283 EXPECT_EQ(isSliceContiguous({2, 3, 2}, {3, 3, 3}), false);
284 EXPECT_EQ(isSliceContiguous({2, 3, 3}, {3, 3, 3}), true);
285 EXPECT_EQ(isSliceContiguous({3, 1, 1}, {3, 3, 3}), false);
286 EXPECT_EQ(isSliceContiguous({3, 1, 2}, {3, 3, 3}), false);
287 EXPECT_EQ(isSliceContiguous({3, 1, 3}, {3, 3, 3}), false);
288 EXPECT_EQ(isSliceContiguous({3, 2, 1}, {3, 3, 3}), false);
289 EXPECT_EQ(isSliceContiguous({3, 2, 2}, {3, 3, 3}), false);
290 EXPECT_EQ(isSliceContiguous({3, 2, 3}, {3, 3, 3}), false);
291 EXPECT_EQ(isSliceContiguous({3, 3, 1}, {3, 3, 3}), false);
292 EXPECT_EQ(isSliceContiguous({3, 3, 2}, {3, 3, 3}), false);
293 EXPECT_EQ(isSliceContiguous({3, 3, 3}, {3, 3, 3}), true);
294}
295
296/// Utility function for testing the optimization of an InsertTensorInstruction
297/// to a TensorViewInstruction when the inserted tensor (slice) is contiguous.
298static void testInsertOptimizer(llvm::ArrayRef<dim_t> srcShape,
299 llvm::ArrayRef<dim_t> destShape,
300 llvm::ArrayRef<dim_t> offsets) {
301 Module mod;
302 Function *F = mod.createFunction("InsertOptimizer");
303 IRFunction M(F);
304 IRBuilder bb(&M);
305
306 auto *dest = bb.createWeightVar(glow::ElemKind::FloatTy, destShape, "dest",
307 WeightVar::MutabilityKind::Mutable);
308 auto *srcAlloc = bb.createAllocActivationInst(
309 "srcAlloc", glow::ElemKind::FloatTy, srcShape);
310 bb.createSplatInst("srcSplat", srcAlloc, 1.0);
311 bb.createSplatInst("destSplat", dest, 2.0);
312 bb.createInsertTensorInst("insert", dest, srcAlloc, offsets, 1, 0);
313 bb.createDeallocActivationInst("deallocSrc", srcAlloc);
314
315 optimize(M, MockBackend().shouldShareBuffers());
316
317 auto &instrs = M.getInstrs();
318 if (srcShape == destShape) {
319 // If the slice was fully inserted then we should be left with only the
320 // the source Splat.
321 EXPECT_EQ(instrs.size(), 1);
322 EXPECT_EQ(instrs.begin()->getName().str(), std::string("srcSplat"));
323 } else if (isSliceContiguous(srcShape, destShape)) {
324 // If the slice is contiguous then we should be left with 2 Splats and a
325 // TensorView. The Insert, Alloc and Dealloc should be gone.
326 EXPECT_EQ(instrs.size(), 3);
327 EXPECT_TRUE(std::all_of(
328 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
329 return isa<SplatInst>(&I) || isa<TensorViewInst>(&I);
330 }));
331 } else {
332 // If the slice is not contiguous, we should be left with the original
333 // instructions: Alloc, 2 Splats, Insert, Dealloc.
334 EXPECT_EQ(instrs.size(), 5);
335 }
336}
337
338/// Simple test where a single Insert is replaced by a TensorView with offsets.
339TEST(Optimizer, insertOptimizer) {
340 testInsertOptimizer({1, 1, 1}, {3, 3, 3}, {0, 0, 0});
341 testInsertOptimizer({1, 1, 2}, {3, 3, 3}, {1, 1, 1});
342 testInsertOptimizer({1, 1, 3}, {3, 3, 3}, {2, 2, 0});
343 testInsertOptimizer({1, 2, 1}, {3, 3, 3}, {0, 0, 1});
344 testInsertOptimizer({1, 2, 2}, {3, 3, 3}, {1, 1, 0});
345 testInsertOptimizer({1, 2, 3}, {3, 3, 3}, {2, 0, 0});
346 testInsertOptimizer({1, 3, 1}, {3, 3, 3}, {0, 0, 0});
347 testInsertOptimizer({1, 3, 2}, {3, 3, 3}, {1, 0, 1});
348 testInsertOptimizer({1, 3, 3}, {3, 3, 3}, {2, 0, 0});
349 testInsertOptimizer({2, 1, 1}, {3, 3, 3}, {0, 0, 1});
350 testInsertOptimizer({2, 1, 2}, {3, 3, 3}, {1, 1, 0});
351 testInsertOptimizer({2, 1, 3}, {3, 3, 3}, {0, 2, 0});
352 testInsertOptimizer({2, 2, 1}, {3, 3, 3}, {1, 0, 0});
353 testInsertOptimizer({2, 2, 2}, {3, 3, 3}, {0, 1, 1});
354 testInsertOptimizer({2, 2, 3}, {3, 3, 3}, {1, 0, 0});
355 testInsertOptimizer({2, 3, 1}, {3, 3, 3}, {0, 0, 1});
356 testInsertOptimizer({2, 3, 2}, {3, 3, 3}, {1, 0, 0});
357 testInsertOptimizer({2, 3, 3}, {3, 3, 3}, {0, 0, 0});
358 testInsertOptimizer({3, 1, 1}, {3, 3, 3}, {0, 0, 0});
359 testInsertOptimizer({3, 1, 2}, {3, 3, 3}, {0, 1, 1});
360 testInsertOptimizer({3, 1, 3}, {3, 3, 3}, {0, 2, 0});
361 testInsertOptimizer({3, 2, 1}, {3, 3, 3}, {0, 0, 1});
362 testInsertOptimizer({3, 2, 2}, {3, 3, 3}, {0, 1, 0});
363 testInsertOptimizer({3, 2, 3}, {3, 3, 3}, {0, 0, 0});
364 testInsertOptimizer({3, 3, 1}, {3, 3, 3}, {0, 0, 0});
365 testInsertOptimizer({3, 3, 2}, {3, 3, 3}, {0, 0, 1});
366 testInsertOptimizer({3, 3, 3}, {3, 3, 3}, {0, 0, 0});
367}
368
369/// Utility function for testing the optimization of an ExtractTensorInstruction
370/// to a TensorViewInstruction when the inserted tensor (slice) is contiguous.
371static void testExtractOptimizer(llvm::ArrayRef<dim_t> destShape,
372 llvm::ArrayRef<dim_t> srcShape,
373 llvm::ArrayRef<dim_t> offsets) {
374 Module mod;
375 Function *F = mod.createFunction("ExtractOptimizer");
376 IRFunction M(F);
377 IRBuilder bb(&M);
378
379 auto *src = bb.createWeightVar(glow::ElemKind::FloatTy, srcShape, "src",
380 WeightVar::MutabilityKind::Mutable);
381 auto *dest = bb.createWeightVar(glow::ElemKind::FloatTy, destShape, "dest",
382 WeightVar::MutabilityKind::Mutable);
383 bb.createSplatInst("srcSplat", src, 1.0);
384 auto *destAlloc =
385 bb.createAllocActivationInst("dest", glow::ElemKind::FloatTy, destShape);
386 bb.createExtractTensorInst("extract", destAlloc, src, offsets);
387 bb.createCopyInst("save", dest, destAlloc);
388 bb.createDeallocActivationInst("deallocDest", destAlloc);
389
390 optimize(M, MockBackend().shouldShareBuffers());
391
392 auto &instrs = M.getInstrs();
393 if (destShape == srcShape) {
394 // If the slice was fully extracted then we should be left with a Splat
395 // and a Copy. The Alloc, Extract and Dealloc should be gone.
396 EXPECT_EQ(instrs.size(), 2);
397 EXPECT_TRUE(std::all_of(instrs.begin(), instrs.end(),
398 [](const Instruction &I) -> bool {
399 return isa<SplatInst>(&I) || isa<CopyInst>(&I);
400 }));
401 } else if (isSliceContiguous(destShape, srcShape)) {
402 // If the extracted slice is contiguous then we should be left with a Splat,
403 // a TensorView and a Copy. The Extract, Alloc and Dealloc should be gone.
404 EXPECT_EQ(instrs.size(), 3);
405 EXPECT_TRUE(std::all_of(
406 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
407 return isa<SplatInst>(&I) || isa<TensorViewInst>(&I) ||
408 isa<CopyInst>(&I);
409 }));
410 } else {
411 // If the slice is not contiguous, we should be left with a Splat and an
412 // Extract. The Alloc, Copy and Dealloc should be gone.
413 EXPECT_EQ(instrs.size(), 2);
414 EXPECT_TRUE(std::all_of(
415 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
416 return isa<SplatInst>(&I) || isa<ExtractTensorInst>(&I);
417 }));
418 }
419}
420
421/// Simple test where a single Extract is replaced by a TensorView with offsets.
422TEST(Optimizer, extractOptimizer) {
423 testExtractOptimizer({1, 1, 1}, {3, 3, 3}, {0, 0, 0});
424 testExtractOptimizer({1, 1, 2}, {3, 3, 3}, {1, 1, 1});
425 testExtractOptimizer({1, 1, 3}, {3, 3, 3}, {2, 2, 0});
426 testExtractOptimizer({1, 2, 1}, {3, 3, 3}, {0, 0, 1});
427 testExtractOptimizer({1, 2, 2}, {3, 3, 3}, {1, 1, 0});
428 testExtractOptimizer({1, 2, 3}, {3, 3, 3}, {2, 0, 0});
429 testExtractOptimizer({1, 3, 1}, {3, 3, 3}, {0, 0, 0});
430 testExtractOptimizer({1, 3, 2}, {3, 3, 3}, {1, 0, 1});
431 testExtractOptimizer({1, 3, 3}, {3, 3, 3}, {2, 0, 0});
432 testExtractOptimizer({2, 1, 1}, {3, 3, 3}, {0, 0, 1});
433 testExtractOptimizer({2, 1, 2}, {3, 3, 3}, {1, 1, 0});
434 testExtractOptimizer({2, 1, 3}, {3, 3, 3}, {0, 2, 0});
435 testExtractOptimizer({2, 2, 1}, {3, 3, 3}, {1, 0, 0});
436 testExtractOptimizer({2, 2, 2}, {3, 3, 3}, {0, 1, 1});
437 testExtractOptimizer({2, 2, 3}, {3, 3, 3}, {1, 0, 0});
438 testExtractOptimizer({2, 3, 1}, {3, 3, 3}, {0, 0, 1});
439 testExtractOptimizer({2, 3, 2}, {3, 3, 3}, {1, 0, 0});
440 testExtractOptimizer({2, 3, 3}, {3, 3, 3}, {0, 0, 0});
441 testExtractOptimizer({3, 1, 1}, {3, 3, 3}, {0, 0, 0});
442 testExtractOptimizer({3, 1, 2}, {3, 3, 3}, {0, 1, 1});
443 testExtractOptimizer({3, 1, 3}, {3, 3, 3}, {0, 2, 0});
444 testExtractOptimizer({3, 2, 1}, {3, 3, 3}, {0, 0, 1});
445 testExtractOptimizer({3, 2, 2}, {3, 3, 3}, {0, 1, 0});
446 testExtractOptimizer({3, 2, 3}, {3, 3, 3}, {0, 0, 0});
447 testExtractOptimizer({3, 3, 1}, {3, 3, 3}, {0, 0, 0});
448 testExtractOptimizer({3, 3, 2}, {3, 3, 3}, {0, 0, 1});
449 testExtractOptimizer({3, 3, 3}, {3, 3, 3}, {0, 0, 0});
450}
451
452/// This is representative of what a ConcatNode is IRGen'd into: src1 and src2
453/// represent the two tensors that are being concatenated, and dest represents
454/// the resulting concatenated tensor.
455TEST(Optimizer, twoInsertsWithBuffersOptimizer) {
456 Module mod;
457 Function *F = mod.createFunction("InsertWithBufferOptimizer");
458 IRFunction M(F);
459 IRBuilder bb(&M);
460
461 auto *output =
462 bb.createWeightVar(glow::ElemKind::FloatTy, {4, 4, 5}, "output",
463 WeightVar::MutabilityKind::Mutable);
464
465 auto *allocSrc1 = bb.createAllocActivationInst(
466 "allocSrc1", glow::ElemKind::FloatTy, {2, 4, 5});
467 auto *allocSrc2 = bb.createAllocActivationInst(
468 "allocSrc2", glow::ElemKind::FloatTy, {2, 4, 5});
469 auto *allocDest = bb.createAllocActivationInst(
470 "allocDest", glow::ElemKind::FloatTy, {4, 4, 5});
471
472 bb.createSplatInst("splatSrc1", allocSrc1, 1.0);
473 bb.createSplatInst("splatSrc2", allocSrc2, 2.0);
474 bb.createSplatInst("splatDest", allocDest, 3.0);
475
476 bb.createInsertTensorInst("insert1", allocDest, allocSrc1, {0, 0, 0}, 1, 0);
477 bb.createInsertTensorInst("insert2", allocDest, allocSrc2, {2, 0, 0}, 1, 0);
478
479 bb.createCopyInst("copy", output, allocDest);
480
481 bb.createDeallocActivationInst("deallocDest", allocDest);
482 bb.createDeallocActivationInst("deallocSrc2", allocSrc2);
483 bb.createDeallocActivationInst("deallocSrc1", allocSrc1);
484
485 optimize(M, MockBackend().shouldShareBuffers());
486
487 // After optimization, should be left with three splats and two tensorviews;
488 // the inserts, allocs, and deallocs should be gone.
489 auto &instrs = M.getInstrs();
490 EXPECT_EQ(instrs.size(), 5);
491 EXPECT_TRUE(std::all_of(
492 instrs.begin(), instrs.end(), [](const Instruction &I) -> bool {
493 return isa<SplatInst>(&I) || isa<TensorViewInst>(&I);
494 }));
495}
496
497/// This is representative of what a SliceNode is IRGen'd into: src is the
498/// original source tensor, and then two slices are created into dest1 and
499/// dest2.
500TEST(Optimizer, twoExtractsWithBuffersOptimizer) {
501 Module mod;
502 Function *F = mod.createFunction("ExtractWithBufferOptimizer");
503 IRFunction M(F);
504 IRBuilder bb(&M);
505
506 auto *output1 =
507 bb.createWeightVar(glow::ElemKind::FloatTy, {2, 4, 5}, "output1",
508 WeightVar::MutabilityKind::Mutable);
509 auto *output2 =
510 bb.createWeightVar(glow::ElemKind::FloatTy, {2, 4, 5}, "output2",
511 WeightVar::MutabilityKind::Mutable);
512
513 auto *allocSrc = bb.createAllocActivationInst(
514 "allocSrc", glow::ElemKind::FloatTy, {4, 4, 5});
515 auto *allocDest1 = bb.createAllocActivationInst(
516 "allocDest1", glow::ElemKind::FloatTy, {2, 4, 5});
517 auto *allocDest2 = bb.createAllocActivationInst(
518 "allocDest2", glow::ElemKind::FloatTy, {2, 4, 5});
519
520 bb.createSplatInst("splatSrc", allocSrc, 3.0);
521
522 bb.createExtractTensorInst("extract1", allocDest1, allocSrc, {0, 0, 0});
523 bb.createExtractTensorInst("extract2", allocDest2, allocSrc, {2, 0, 0});
524
525 bb.createCopyInst("copy", output1, allocDest1);
526 bb.createCopyInst("copy", output2, allocDest2);
527
528 bb.createDeallocActivationInst("deallocSrc", allocSrc);
529 bb.createDeallocActivationInst("deallocDest2", allocDest2);
530 bb.createDeallocActivationInst("deallocDest1", allocDest1);
531
532 optimize(M, MockBackend().shouldShareBuffers());
533
534 // After optimization, the extracts should be gone, as well as both allocDests
535 // and their deallocs. Should be left with splatSrc, allocSrc, deallocSrc, two
536 // tensorviews, and two copies from the tensorviews into the outputs.
537 auto &instrs = M.getInstrs();
538 EXPECT_EQ(instrs.size(), 7);
539 EXPECT_TRUE(std::none_of(
540 instrs.begin(), instrs.end(),
541 [](const Instruction &I) -> bool { return isa<ExtractTensorInst>(&I); }));
542}
543
544/// Check that we are able to coalesce a copy forward from the input.
545/// This test consists in copy from the input variable.
546/// Its may characteristic is that this copy cannot be coalesced with
547/// the output (otherwise it would be a backward chain of
548/// copies from output).
549/// The shareBuffers optimization works backward, so as long as
550/// it manages to coalesce things with output one by one, we
551/// won't see if the forward copies are properly handled.
552TEST(Optimizer, forwardCopy) {
553 Module mod;
554 Function *F = mod.createFunction("forwardCopy");
555 IRFunction M(F);
556 IRBuilder bb(&M);
557
558 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {64}, "input",
559 WeightVar::MutabilityKind::Mutable);
560 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "output",
561 WeightVar::MutabilityKind::Mutable);
562 auto *tmp1 =
563 bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {64});
564 bb.createCopyInst("copy1", tmp1, input);
565
566 auto *view = bb.createTensorViewInst(
567 "view", tmp1, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 64})),
568 {0});
569 bb.createInsertTensorInst("copyOutput", output, view, {0, 0}, 1, 0);
570
571 bb.createDeallocActivationInst("dealloc1", tmp1);
572
573 auto &instrs = M.getInstrs();
574 auto nbInstrsBeforeOpt = instrs.size();
575 optimize(M, MockBackend().shouldShareBuffers());
576
577 // After optimization, the copy should have been coalesced with input.
578 // nbIntrsBeforeOpt - 1 copy - 1 dealloc - 1 alloc
579 EXPECT_EQ(instrs.size(),
580 nbInstrsBeforeOpt /*copy*/ - 1 /*alloca*/ - 1 /*dealloc*/ - 1);
581 EXPECT_TRUE(std::none_of(instrs.begin(), instrs.end(),
582 [](const Instruction &I) -> bool {
583 return isa<AllocActivationInst>(&I);
584 }));
585 EXPECT_TRUE(std::none_of(
586 instrs.begin(), instrs.end(),
587 [](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
588}
589
590/// Check that we are able to coalesce chain of copies
591/// forward from the input.
592/// This test is similar to forwardCopy, expect it uses a chain of copies (more
593/// than one) instead of just on copy from input.
594TEST(Optimizer, chainOfTwoForwardCopies) {
595 Module mod;
596 Function *F = mod.createFunction("chainOfTwoForwardCopies");
597 IRFunction M(F);
598 IRBuilder bb(&M);
599
600 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {64}, "input",
601 WeightVar::MutabilityKind::Mutable);
602 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "output",
603 WeightVar::MutabilityKind::Mutable);
604 auto *tmp1 =
605 bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {64});
606 bb.createCopyInst("copy1", tmp1, input);
607
608 auto *tmp2 =
609 bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {64});
610 bb.createCopyInst("copy2", tmp2, tmp1);
611 auto *view = bb.createTensorViewInst(
612 "view", tmp2, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 64})),
613 {0});
614 bb.createInsertTensorInst("copyOutput", output, view, {0, 0}, 1, 0);
615
616 bb.createDeallocActivationInst("dealloc1", tmp1);
617 bb.createDeallocActivationInst("dealloc2", tmp2);
618
619 auto &instrs = M.getInstrs();
620 auto nbInstrsBeforeOpt = instrs.size();
621 optimize(M, MockBackend().shouldShareBuffers());
622
623 // After optimization, the copies should have been coalesced with
624 // input.
625 // Ideally, we should get rid of 2 copies, the related 2 allocactivations and
626 // deallocation.
627 // Therefore expected instructions should be
628 // nbIntrsBeforeOpt - 2 copies - 2 dealloc - 2 alloc
629 EXPECT_EQ(instrs.size(),
630 nbInstrsBeforeOpt /*copy*/ - 2 /*alloca*/ - 2 /*dealloc*/ - 2);
631 EXPECT_TRUE(std::none_of(instrs.begin(), instrs.end(),
632 [](const Instruction &I) -> bool {
633 return isa<AllocActivationInst>(&I);
634 }));
635 EXPECT_TRUE(std::none_of(
636 instrs.begin(), instrs.end(),
637 [](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
638}
639
640/// The idea of this test is to have live intervals looking like this:
641/// A B
642/// | <-copy |
643/// inout |
644/// | |
645/// Because of the inout on A, A and B interfere.
646/// Make sure we don't coalesce such buffers.
647TEST(Optimizer, inoutCopy) {
648 Module mod;
649 Function *F = mod.createFunction("inoutCopy");
650 IRFunction M(F);
651 IRBuilder bb(&M);
652
653 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "input",
654 WeightVar::MutabilityKind::Mutable);
655 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {3, 64}, "output",
656 WeightVar::MutabilityKind::Mutable);
657 auto *output2 =
658 bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "output2",
659 WeightVar::MutabilityKind::Mutable);
660 // This copy cannot be eliminated because input must not be changed.
661 // Indeed, this is an observable variable plus it is used as a source
662 // for a copy to output2.
663 auto *tmp1 =
664 bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {2, 64});
665 bb.createCopyInst("copy1", tmp1, input);
666
667 auto *tmp2 =
668 bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {64});
669 bb.createSplatInst("splat", tmp2, 3.0);
670 auto *view =
671 bb.createTensorView(ElemKind::FloatTy, {1, 64}, tmp2, "view", {0});
672 bb.createInsertTensorInst("insertTmp1", tmp1, view, {0, 0}, 1, 0);
673 bb.createInsertTensorInst("insertOutput", output, tmp1, {1, 0}, 1, 0);
674 bb.createCopyInst("copyOutput2", output2, input);
675
676 bb.createDeallocActivationInst("dealloc1", tmp1);
677 bb.createDeallocActivationInst("dealloc2", tmp2);
678
679 optimize(M, MockBackend().shouldShareBuffers());
680
681 // After optimization, the copies shouldn't have been touched.
682 // tmp1 = copy input cannot be coalesced because tmp1 is inout.
683 // output2 = copy input cannot be coalesced because they are both
684 // externally visible.
685 EXPECT_EQ(input->getNumUsers(), 2);
686 EXPECT_TRUE(
687 std::all_of(input->getUsers().begin(), input->getUsers().end(),
688 [](const Use &I) -> bool { return isa<CopyInst>(I.get()); }));
689 const Value *expectedDest[] = {tmp1, output2};
690 unsigned idx = 0;
691 for (const Use &use : input->getUsers()) {
692 if (idx == sizeof(expectedDest) / sizeof(expectedDest[0])) {
693 // If we end up here that means that input has too many users.
694 EXPECT_FALSE(true);
695 break;
696 }
697 EXPECT_EQ(use.get()->getOperand(0).first, expectedDest[idx++]);
698 }
699}
700
701/// Check that we properly define a buffer when we extend its live-range
702/// on a segment of the source that does not have any definition.
703/// A source live-range without any definition can happen when this
704/// is the first use of a WeightVar.
705/// At the high level, this test looks like this:
706/// WeightVar Buffer
707/// | useA
708/// | useB | def
709/// | redef | save to output
710/// - UseA is the first use of WeightVar and we want it to be replaced by
711/// a use of Buffer. I.e., Buffer live-range is extended toward the top.
712/// - UseB involves both WeightVar and Buffer. It exposes the buffer sharing
713/// opportunity between these two variables. It must happen after useA
714/// to expose the case of extending the live-range of a buffer toward
715/// the top where no definition exists.
716/// - redef redefines WeightVar. It is necessary otherwise both useA and useB
717/// could all share the same buffer and thus, we would extend the live-range
718/// of the buffer in useA downward (or the use of Buffer up to the
719/// definition of the buffer in useA), which is not what we want to test.
720/// - save to output is required to keep the def of Buffer alive. Moreover,
721/// the save must be done in such a way that the output buffer and Buffer
722/// must not be able to share the same buffer. Otherwise, the live-range
723/// of output buffer will be extended upward to useB and given output and
724/// WeightVar are both externally observable, the output buffer cannot be
725/// merged with WeightVar.
726/// Therefore, we won't expose an extension of output up to useA
727/// and won't test the case where the replaced buffer doesn't have any
728/// definition.
729///
730/// The expected result at a high level looks like this:
731/// WeightVar Buffer
732/// | copy | <- Buffer gets WeightVar
733/// useA | <- Buffer is used instead of WeightVar
734/// useB | def <- ditto
735/// | redef | save to output
736TEST(Optimizer, bufferReuseWithoutDefs) {
737 Module mod;
738 Function *F = mod.createFunction("bufferReuseWithoutDefs");
739 IRFunction M(F);
740 IRBuilder bb(&M);
741
742 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {64}, "input",
743 WeightVar::MutabilityKind::Mutable);
744 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "output",
745 WeightVar::MutabilityKind::Mutable);
746 auto *tmp1 =
747 bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {64});
748
749 auto *tmp2 =
750 bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {64});
751 auto *tmp3 =
752 bb.createAllocActivationInst("tmp3", glow::ElemKind::FloatTy, {64});
753
754 bb.createSplatInst("tmp2init", tmp2, 1.0);
755 // use input for some stuff.
756 auto *useA = bb.createElementAddInst("useA", tmp3, tmp2, input);
757 // Make the first user of input a dependency of the definition
758 // of tmp1 that way the scheduler cannot mess with the layout
759 // we want for the instructions ordering.
760 bb.createElementAddInst("useB", tmp1, input, tmp3);
761 bb.createCopyInst("redef", input, tmp3);
762 auto *view = bb.createTensorViewInst(
763 "view", tmp1, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 64})),
764 {0});
765 bb.createInsertTensorInst("save", output, view, {0, 0}, 1, 0);
766
767 bb.createDeallocActivationInst("dealloc1", tmp1);
768 bb.createDeallocActivationInst("dealloc2", tmp2);
769 bb.createDeallocActivationInst("dealloc2", tmp3);
770
771 optimize(M, MockBackend().shouldShareBuffers());
772
773 // Check that we manage to expose the problematic case we wanted:
774 // tmp1 is extended upward and replace the use of input.
775 EXPECT_EQ(useA->getRHS(), tmp1);
776 // Check that tmp1 is properly defined before useA.
777 Instruction *instBeforeUseA = &*std::prev(useA->getIterator());
778
779 EXPECT_TRUE(isa<CopyInst>(instBeforeUseA));
780 // The somewhat complicated check is to make sure we don't crash the test
781 // when instBeforeUseA is not a copy.
782 // I.e., this test was failing (instead of crashing) when the
783 // bug was present.
784 EXPECT_EQ(instBeforeUseA->getNumOperands() > 0
785 ? instBeforeUseA->getOperand(0).first
786 : nullptr,
787 tmp1);
788 EXPECT_EQ(instBeforeUseA->getNumOperands() > 1
789 ? instBeforeUseA->getOperand(1).first
790 : nullptr,
791 input);
792}
793
794/// Same as bufferReuseWithoutDefs but with casts in the middle.
795/// This makes sure that we properly set the types for whatever fixup
796/// code we will insert.
797/// The high level view of the test is:
798/// WeightVar Buffer
799/// | useA
800/// | useB(cast)| def
801/// | redef | save to output
802///
803/// The expected result at a high level looks like this:
804/// WeightVar Buffer
805/// | copy(cast)| <- Buffer gets WeightVar
806/// useA(cast)| <- Buffer is used instead of WeightVar
807/// useB | def <- ditto
808/// | redef | save to output
809TEST(Optimizer, bufferReuseWithoutDefsPlusCasts) {
810 Module mod;
811 Function *F = mod.createFunction("bufferReuseWithoutDefsPlusCasts");
812 IRFunction M(F);
813 IRBuilder bb(&M);
814
815 auto *input = bb.createWeightVar(glow::ElemKind::FloatTy, {1, 64}, "input",
816 WeightVar::MutabilityKind::Mutable);
817 auto *output = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "output",
818 WeightVar::MutabilityKind::Mutable);
819 auto *tmp1 =
820 bb.createAllocActivationInst("tmp1", glow::ElemKind::FloatTy, {64});
821
822 auto *tmp2 =
823 bb.createAllocActivationInst("tmp2", glow::ElemKind::FloatTy, {1, 64});
824 auto *tmp3 =
825 bb.createAllocActivationInst("tmp3", glow::ElemKind::FloatTy, {1, 64});
826
827 bb.createSplatInst("tmp2init", tmp2, 1.0);
828 auto *useA = bb.createElementAddInst("useA", tmp3, tmp2, input);
829 auto *inputView = bb.createTensorViewInst(
830 "inputView", input, mod.uniqueType(Type(glow::ElemKind::FloatTy, {64})),
831 {0, 0});
832 auto *tmp3View = bb.createTensorViewInst(
833 "tmp3View", tmp3, mod.uniqueType(Type(glow::ElemKind::FloatTy, {64})),
834 {0, 0});
835
836 bb.createElementAddInst("useB", tmp1, inputView, tmp3View);
837 bb.createCopyInst("redef", input, tmp3);
838 auto *view = bb.createTensorViewInst(
839 "view", tmp1, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 64})),
840 {0});
841 bb.createInsertTensorInst("save", output, view, {0, 0}, 1, 0);
842
843 bb.createDeallocActivationInst("dealloc1", tmp1);
844 bb.createDeallocActivationInst("dealloc2", tmp2);
845 bb.createDeallocActivationInst("dealloc2", tmp3);
846
847 optimize(M, MockBackend().shouldShareBuffers());
848
849 // Check that we manage to expose the problematic case we wanted:
850 // tmp1 is extended upward and replace the use of input.
851 Value *useARHS = useA->getRHS();
852 EXPECT_EQ(getOrigin(useARHS), tmp1);
853 Instruction *tmp1TensorView = dyn_cast<TensorViewInst>(useARHS);
854 EXPECT_TRUE(tmp1TensorView && tmp1TensorView->getOperand(0).first == tmp1);
855 // Check that tmp1 is properly defined before useA.
856 Instruction *tmp1Fixup =
857 tmp1TensorView ? &*std::prev(tmp1TensorView->getIterator()) : nullptr;
858 EXPECT_TRUE(tmp1Fixup && isa<CopyInst>(tmp1Fixup));
859 // The somewhat complicated check is to make sure we don't crash the test
860 // when instBeforeUseA is not a copy.
861 EXPECT_EQ((tmp1Fixup && tmp1Fixup->getNumOperands() > 0)
862 ? getOrigin(tmp1Fixup->getOperand(0).first)
863 : nullptr,
864 tmp1);
865 // Now check that input feeds tmp1Fixup and was properly casted.
866 Instruction *inputCast =
867 (tmp1Fixup && tmp1Fixup->getNumOperands() > 1)
868 ? dyn_cast<TensorViewInst>(tmp1Fixup->getOperand(1).first)
869 : nullptr;
870 EXPECT_EQ(inputCast ? getOrigin(inputCast) : nullptr, input);
871 EXPECT_EQ(inputCast ? inputCast->getOperand(0).first : nullptr, input);
872}
873
874/// Check that a copy from a buffer to itself is
875/// detected when both src and dest are hidden under TensorView
876/// instructions and eliminated if the linearized offsets of the src and dest
877/// are equal.
878TEST(Optimizer, copyEliminationTensorViewToTensorView) {
879 Module mod;
880 Function *F = mod.createFunction("copyEliminationTensorViewToTensorView");
881 IRFunction M(F);
882 IRBuilder bb(&M);
883
884 // Test that a copy between tensorviews with identical offsets which have
885 // different src operands with different offsets into the same underlying
886 // buffer is not optimised away.
887
888 // Create a WeightVar for TensorViews to use as their source operand.
889 auto *A = bb.createWeightVar(glow::ElemKind::FloatTy, {4, 2}, "A",
890 WeightVar::MutabilityKind::Mutable);
891
892 // Create a view into A.
893 auto *view1 = bb.createTensorViewInst(
894 "view1", A, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 2, 1})),
895 {0, 0});
896
897 // Create another view into A with the same shape as view1 but different
898 // offsets.
899 auto *view2 = bb.createTensorViewInst(
900 "view2", A, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 2, 1})),
901 {1, 1});
902
903 // Create views into view1 and view2 with identical offsets.
904 auto *view3 = bb.createTensorViewInst(
905 "view3", view1, mod.uniqueType(Type(glow::ElemKind::FloatTy, {2, 1})),
906 {0, 0, 0});
907
908 auto *view4 = bb.createTensorViewInst(
909 "view4", view2, mod.uniqueType(Type(glow::ElemKind::FloatTy, {2, 1})),
910 {0, 0, 0});
911
912 // Create a copy from view3 to view4. These views both point to 2 elements in
913 // A starting at offset {0, 0}, so this should be optimized out.
914 bb.createCopyInst("copyViewToView", view3, view4);
915
916 auto &instrs = M.getInstrs();
917 optimize(M, MockBackend().shouldShareBuffers());
918
919 // All instructions should remain because the linearized offsets of the final
920 // tensorview are not the same.
921 EXPECT_EQ(instrs.size(), 5);
922 EXPECT_FALSE(std::none_of(
923 instrs.begin(), instrs.end(),
924 [](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
925 EXPECT_FALSE(std::none_of(
926 instrs.begin(), instrs.end(),
927 [](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
928
929 // Reset state for next test.
930 M.clear();
931 M.setGraph(F);
932
933 // Test that a copy between tensorviews with different offsets which have
934 // different src operands with different offsets but have the same linearized
935 // offset into the same underlying buffer is optimized away.
936
937 // Create a WeightVar for TensorViews to use as their source operand.
938 auto *D = bb.createWeightVar(glow::ElemKind::FloatTy, {4, 2}, "B",
939 WeightVar::MutabilityKind::Mutable);
940
941 // Create another WeightVar. E will be copied into this to avoid
942 // optimizing all instructions away.
943 auto *E = bb.createWeightVar(glow::ElemKind::FloatTy, {4, 2}, "C",
944 WeightVar::MutabilityKind::Mutable);
945
946 // Create a view into D. The linearized offset of this TensorView is 0 and the
947 // size is 8.
948 auto *view7 = bb.createTensorViewInst(
949 "view7", D, mod.uniqueType(Type(glow::ElemKind::FloatTy, {4, 2, 1})),
950 {0, 0});
951
952 // Create a view into view7. The linearized offset of this TensorView is
953 // 4 and the size is 2.
954 auto *view8 = bb.createTensorViewInst(
955 "view8", view7, mod.uniqueType(Type(glow::ElemKind::FloatTy, {2})),
956 {2, 0, 0});
957
958 // Create a view into D. The linearized offset of this TensorView is 4 and the
959 // size is 4.
960 auto *view9 = bb.createTensorViewInst(
961 "view9", D, mod.uniqueType(Type(glow::ElemKind::FloatTy, {4})), {2, 0});
962
963 // Create a view into view9. The linearized offset of this TensorView is 4 and
964 // the size is 2.
965 auto *view10 = bb.createTensorViewInst(
966 "view10", view9, mod.uniqueType(Type(glow::ElemKind::FloatTy, {2})), {0});
967
968 // Create a copy from view8 to view 10. Since the linearized offsets and types
969 // of the two views are identical, this copy should be optimized out.
970 bb.createCopyInst("copyViewToView", view8, view10);
971
972 // Insert D into E just to make sure the IR isn't empty after optimisation.
973 bb.createInsertTensorInst("copyOutput", E, D, /*Offsets=*/{0, 0},
974 /*Count=*/1, /*Axis=*/0);
975
976 optimize(M, MockBackend().shouldShareBuffers());
977
978 // Only one instruction (the InsertTensor) should remain.
979 EXPECT_EQ(instrs.size(), 1);
980 EXPECT_TRUE(std::none_of(
981 instrs.begin(), instrs.end(),
982 [](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
983 EXPECT_TRUE(std::none_of(
984 instrs.begin(), instrs.end(),
985 [](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
986}
987
988/// Check that a copy from a buffer to itself is
989/// detected when the src is hidden under a layer of TensorView instructions and
990/// eliminated if the linearized offsets of the src and dest are equal.
991TEST(Optimizer, copyEliminationTensorViewBuffer) {
992 Module mod;
993 Function *F = mod.createFunction("copyEliminationTensorViewToBuffer");
994 IRFunction M(F);
995 IRBuilder bb(&M);
996
997 // Create a WeightVar for TensorViews to use as their source operand.
998 auto *B = bb.createWeightVar(glow::ElemKind::FloatTy, {4, 2}, "B",
999 WeightVar::MutabilityKind::Mutable);
1000
1001 // Create another WeightVar. B will be copied into this to avoid
1002 // optimizing all instructions away.
1003 auto *C = bb.createWeightVar(glow::ElemKind::FloatTy, {4, 2}, "C",
1004 WeightVar::MutabilityKind::Mutable);
1005
1006 // Create two stacked views into A. Two are required because a tensorview
1007 // that has the same type as its src is eliminated before copy elimination is
1008 // applied.
1009 auto *view1 = bb.createTensorViewInst(
1010 "view1", B, mod.uniqueType(Type(glow::ElemKind::FloatTy, {1, 4, 2})),
1011 {0, 0});
1012
1013 auto *view2 = bb.createTensorViewInst(
1014 "view2", view1, mod.uniqueType(Type(glow::ElemKind::FloatTy, {4, 2})),
1015 {0, 0, 0});
1016
1017 // Create a copy from view2 to B. This view points to the start of A and has
1018 // the same type, so this should be optimized out.
1019 bb.createCopyInst("copyViewToBuf", view2, B);
1020
1021 // Create a copy from B to view2. This should also be optimized out for the
1022 // same reason.
1023 bb.createCopyInst("copyBufToView", B, view2);
1024
1025 // Insert B into C. This exists just to make sure the optimised IR isn't
1026 // empty.
1027 bb.createInsertTensorInst("copyOutput", C, B, /*Offsets=*/{0, 0},
1028 /*Count=*/1, /*Axis=*/0);
1029
1030 auto &instrs = M.getInstrs();
1031 optimize(M, MockBackend().shouldShareBuffers());
1032
1033 // Only one instruction (the InsertTensor) should remain.
1034 EXPECT_EQ(instrs.size(), 1);
1035 EXPECT_TRUE(std::none_of(
1036 instrs.begin(), instrs.end(),
1037 [](const Instruction &I) -> bool { return isa<CopyInst>(&I); }));
1038 EXPECT_TRUE(std::none_of(
1039 instrs.begin(), instrs.end(),
1040 [](const Instruction &I) -> bool { return isa<TensorViewInst>(&I); }));
1041}
1042
1043/// Check if dump functions work for Value and IRFunction.
1044TEST(Optimizer, dumpDataStructure) {
1045 Module mod;
1046 Function *F = mod.createFunction("inoutCopy");
1047 IRFunction M(F);
1048 IRBuilder bb(&M);
1049
1050 Value *input = bb.createWeightVar(glow::ElemKind::FloatTy, {2, 64}, "input",
1051 WeightVar::MutabilityKind::Mutable);
1052 // Dump Value.
1053 std::string storageV1;
1054 llvm::raw_string_ostream osV1(storageV1);
1055 input->dump(osV1);
1056 std::string mesV = input->toString();
1057 std::string expectMesV = R"(%input = WeightVar float<2 x 64> mutable)";
1058 EXPECT_EQ(mesV, expectMesV);
1059 EXPECT_EQ(mesV, osV1.str());
1060 std::string storageV2;
1061 llvm::raw_string_ostream osV2(storageV2);
1062 osV2 << input;
1063 EXPECT_EQ(mesV, osV2.str());
1064 // Dump IRFunction.
1065 std::string storageIRF1;
1066 llvm::raw_string_ostream osIRF1(storageIRF1);
1067 M.dump(osIRF1);
1068 std::string mesI = M.toString();
1069 std::string expectMesI = R"(function inoutCopy
1070declare {
1071 %input = WeightVar float<2 x 64> mutable // size: 512
1072
1073 ; size = 512 bytes
1074}
1075
1076code {
1077}
1078)";
1079 EXPECT_EQ(mesI, expectMesI);
1080 EXPECT_EQ(mesI, osIRF1.str());
1081 std::string storageIRF2;
1082 llvm::raw_string_ostream osIRF2(storageIRF2);
1083 osIRF2 << M;
1084 EXPECT_EQ(mesI, osIRF2.str());
1085}
1086