test_memdependency.cpp source code [pytorch/test/cpp/tensorexpr/test_memdependency.cpp]

1	#include <gtest/gtest.h>
2	#include <test/cpp/tensorexpr/test_base.h>
3
4	#include <torch/csrc/jit/tensorexpr/bounds_overlap.h>
5	#include <torch/csrc/jit/tensorexpr/ir.h>
6	#include <torch/csrc/jit/tensorexpr/ir_printer.h>
7	#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
8	#include <torch/csrc/jit/tensorexpr/loopnest.h>
9	#include <torch/csrc/jit/tensorexpr/mem_dependency_checker.h>
10	#include <torch/csrc/jit/tensorexpr/tensor.h>
11
12	namespace torch {
13	namespace jit {
14
15	using namespace torch::jit::tensorexpr;
16
17	// Test helper function used to determine if two regions of a buffer have an
18	// overlap. No Overlap & partial overlap is obvious. Contains means A is
19	// larger and fully encloses B, while ContainedOrEqual is the reverse. Equal
20	// ranges are ContainedOrEqual.
21	TEST(MemDependency, BoundOverlap) {
22	using namespace analysis;
23
24	auto CB = [](int s, int e) {
25	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
26	};
27
28	// Sanity check 3 overlap cases.
29	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`0`, `0`), CB(`0`, `0`)));
30	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(`0`, `3`), CB(`2`, `5`)));
31	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`0`, `0`), CB(`1`, `1`)));
32
33	// Partial overlap works in either order.
34	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(`0`, `10`), CB(`7`, `14`)));
35	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(`7`, `14`), CB(`0`, `10`)));
36
37	// Total Overlap works when one bound encloses the other, and returns which.
38	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`2`, `15`), CB(`7`, `9`)));
39	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`2`, `15`), CB(`0`, `16`)));
40
41	// Total overlap works when the bounds are an identical range, returns
42	// ContainedOrEqual.
43	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`2`, `15`), CB(`2`, `15`)));
44
45	// Total overlap when only one end of the bound matches.
46	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`2`, `15`), CB(`2`, `10`)));
47	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`2`, `15`), CB(`3`, `15`)));
48	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`0`, `10`), CB(`0`, `9`)));
49	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`2`, `10`), CB(`2`, `15`)));
50	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`3`, `15`), CB(`2`, `15`)));
51
52	// No overlap when a < b.
53	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`0`, `2`), CB(`5`, `10`)));
54	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`2`, `2`), CB(`3`, `3`)));
55	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`100`, `120`), CB(`130`, `130`)));
56
57	// No overlap when a > b.
58	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`5`, `10`), CB(`0`, `2`)));
59	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`3`, `3`), CB(`2`, `2`)));
60	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`130`, `130`), CB(`100`, `120`)));
61
62	// No overlap when adjacent.
63	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`0`, `100`), CB(`101`, `120`)));
64	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(`2`, `3`), CB(`0`, `1`)));
65
66	// Partial overlap when middle bounds match.
67	ASSERT_EQ(
68	OverlapKind::PartialOverlap, boundOverlap(CB(`0`, `100`), CB(`100`, `120`)));
69	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(`0`, `2`), CB(`2`, `4`)));
70	ASSERT_EQ(
71	OverlapKind::PartialOverlap, boundOverlap(CB(`100`, `120`), CB(`0`, `100`)));
72	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(`2`, `3`), CB(`1`, `2`)));
73
74	// Total overlap when one bound is single length over one end of the other.
75	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`2`, `15`), CB(`15`, `15`)));
76	ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(`2`, `15`), CB(`2`, `2`)));
77	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`2`, `2`), CB(`2`, `15`)));
78	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(`15`, `15`), CB(`2`, `15`)));
79	}
80
81	TEST(MemDependency, BoundComparison) {
82	using namespace analysis;
83
84	auto CB = [](int s, int e) {
85	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
86	};
87
88	ASSERT_EQ(
89	CmpEvalResult::NotDetermined,
90	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kEQ));
91	ASSERT_EQ(
92	CmpEvalResult::True,
93	compareBound(CB(`10`, `10`), CB(`10`, `10`), CompareSelectOperation::kEQ));
94	ASSERT_EQ(
95	CmpEvalResult::False,
96	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kEQ));
97	ASSERT_EQ(
98	CmpEvalResult::False,
99	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kEQ));
100	ASSERT_EQ(
101	CmpEvalResult::NotDetermined,
102	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kEQ));
103	ASSERT_EQ(
104	CmpEvalResult::NotDetermined,
105	compareBound(CB(`30`, `40`), CB(`20`, `30`), CompareSelectOperation::kEQ));
106	ASSERT_EQ(
107	CmpEvalResult::NotDetermined,
108	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kEQ));
109
110	ASSERT_EQ(
111	CmpEvalResult::NotDetermined,
112	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kNE));
113	ASSERT_EQ(
114	CmpEvalResult::False,
115	compareBound(CB(`10`, `10`), CB(`10`, `10`), CompareSelectOperation::kNE));
116	ASSERT_EQ(
117	CmpEvalResult::True,
118	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kNE));
119	ASSERT_EQ(
120	CmpEvalResult::True,
121	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kNE));
122	ASSERT_EQ(
123	CmpEvalResult::NotDetermined,
124	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kNE));
125	ASSERT_EQ(
126	CmpEvalResult::NotDetermined,
127	compareBound(CB(`30`, `40`), CB(`20`, `30`), CompareSelectOperation::kEQ));
128	ASSERT_EQ(
129	CmpEvalResult::NotDetermined,
130	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kNE));
131
132	ASSERT_EQ(
133	CmpEvalResult::True,
134	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kLT));
135	ASSERT_EQ(
136	CmpEvalResult::False,
137	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kLT));
138	ASSERT_EQ(
139	CmpEvalResult::False,
140	compareBound(CB(`30`, `40`), CB(`10`, `30`), CompareSelectOperation::kLT));
141	ASSERT_EQ(
142	CmpEvalResult::NotDetermined,
143	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kLT));
144	ASSERT_EQ(
145	CmpEvalResult::NotDetermined,
146	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kLT));
147	ASSERT_EQ(
148	CmpEvalResult::NotDetermined,
149	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kLT));
150
151	ASSERT_EQ(
152	CmpEvalResult::False,
153	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kGE));
154	ASSERT_EQ(
155	CmpEvalResult::True,
156	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kGE));
157	ASSERT_EQ(
158	CmpEvalResult::True,
159	compareBound(CB(`30`, `40`), CB(`10`, `30`), CompareSelectOperation::kGE));
160	ASSERT_EQ(
161	CmpEvalResult::NotDetermined,
162	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kGE));
163	ASSERT_EQ(
164	CmpEvalResult::NotDetermined,
165	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kGE));
166	ASSERT_EQ(
167	CmpEvalResult::NotDetermined,
168	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kGE));
169
170	ASSERT_EQ(
171	CmpEvalResult::False,
172	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kGT));
173	ASSERT_EQ(
174	CmpEvalResult::False,
175	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kGT));
176	ASSERT_EQ(
177	CmpEvalResult::NotDetermined,
178	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kGT));
179	ASSERT_EQ(
180	CmpEvalResult::True,
181	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kGT));
182	ASSERT_EQ(
183	CmpEvalResult::NotDetermined,
184	compareBound(CB(`30`, `40`), CB(`10`, `30`), CompareSelectOperation::kGT));
185	ASSERT_EQ(
186	CmpEvalResult::NotDetermined,
187	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kGT));
188
189	ASSERT_EQ(
190	CmpEvalResult::True,
191	compareBound(CB(`10`, `20`), CB(`30`, `40`), CompareSelectOperation::kLE));
192	ASSERT_EQ(
193	CmpEvalResult::True,
194	compareBound(CB(`30`, `40`), CB(`40`, `50`), CompareSelectOperation::kLE));
195	ASSERT_EQ(
196	CmpEvalResult::NotDetermined,
197	compareBound(CB(`10`, `100`), CB(`10`, `100`), CompareSelectOperation::kLE));
198	ASSERT_EQ(
199	CmpEvalResult::False,
200	compareBound(CB(`30`, `40`), CB(`10`, `20`), CompareSelectOperation::kLE));
201	ASSERT_EQ(
202	CmpEvalResult::NotDetermined,
203	compareBound(CB(`30`, `40`), CB(`10`, `30`), CompareSelectOperation::kLE));
204	ASSERT_EQ(
205	CmpEvalResult::NotDetermined,
206	compareBound(CB(`30`, `45`), CB(`40`, `50`), CompareSelectOperation::kLE));
207	}
208
209	TEST(MemDependency, BoundOverlapSymbolic) {
210	VarHandle x("x", kInt);
211	VarHandle y("y", kInt);
212	VarHandle z("z", kInt);
213	VarHandle w("w", kInt);
214
215	using namespace analysis;
216
217	auto CB = [](ExprHandle s, ExprHandle e) {
218	return Bound (s.node(), e.node());
219	};
220
221	// Sanity check cases where the start and end is symbolic but the diff is
222	// constant.
223	// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
224	ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(x, x), CB(x, x)));
225	ASSERT_EQ(
226	OverlapKind::PartialOverlap,
227	boundOverlap(CB(x, x + `3`), CB(x + `2`, x + `5`)));
228	ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(x, x), CB(x + `1`, x + `1`)));
229
230	// We can't infer the sign of y, so cannot tell whether adding y is larger or
231	// smaller than y/2.
232	ASSERT_EQ(
233	OverlapKind::PartialOverlap,
234	boundOverlap(CB(x, x + y), CB(x, x + y / `2`)));
235
236	// No information about this bound, have to take the most conservative option:
237	// there may be an overlap.
238	ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(x, y), CB(z, w)));
239
240	// Math on opaque terms works.
241	ASSERT_EQ(
242	OverlapKind::ContainedOrEqual,
243	boundOverlap(CB(x + w, y - z), CB(x + w, y - z)));
244	// Even requiring simplification.
245	ASSERT_EQ(
246	OverlapKind::ContainedOrEqual,
247	boundOverlap(CB(x - w - w, y), CB(x - w * `2`, y)));
248	}
249
250	// Tests the helper function for overlap of multi dimensional indices bounds.
251	// This uses boundOverlap on each dimension and return the "lowest" kind of
252	// overlap.
253	TEST(MemDependency, BoundOverlapMultiDim) {
254	using namespace analysis;
255
256	auto CB = [](int s, int e) {
257	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
258	};
259
260	// Sanity check one dimensional cases.
261	ASSERT_EQ(OverlapKind::ContainedOrEqual, overlaps({CB(`0`, `0`)}, {CB(`0`, `0`)}));
262	ASSERT_EQ(OverlapKind::NoOverlap, overlaps({CB(`0`, `2`)}, {CB(`5`, `10`)}));
263	ASSERT_EQ(
264	OverlapKind::PartialOverlap, overlaps({CB(`0`, `100`)}, {CB(`100`, `120`)}));
265
266	// Total overlap in 3 dims.
267	ASSERT_EQ(
268	OverlapKind::ContainedOrEqual,
269	overlaps({CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `4`)}, {CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `4`)}));
270	ASSERT_EQ(
271	OverlapKind::ContainedOrEqual,
272	overlaps(
273	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `4`)}, {CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `10`)}));
274
275	// Total overlap in 2 dims, no overlap in another.
276	ASSERT_EQ(
277	OverlapKind::NoOverlap,
278	overlaps(
279	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `4`)}, {CB(`0`, `2`), CB(`0`, `5`), CB(`5`, `10`)}));
280
281	// Total overlap in 2 dims, partial overlap in another.
282	ASSERT_EQ(
283	OverlapKind::PartialOverlap,
284	overlaps(
285	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`0`, `2`), CB(`0`, `5`), CB(`5`, `10`)}));
286	// This case is most important, so verify the overlap in any dim. (dim 2)
287	ASSERT_EQ(
288	OverlapKind::PartialOverlap,
289	overlaps({CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`0`, `2`), CB(`2`, `6`), CB(`0`, `5`)}));
290	// Dim 1.
291	ASSERT_EQ(
292	OverlapKind::PartialOverlap,
293	overlaps({CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`1`, `3`), CB(`0`, `5`), CB(`0`, `5`)}));
294	// Total overlap in 1 dim, partial in 2.
295	ASSERT_EQ(
296	OverlapKind::PartialOverlap,
297	overlaps(
298	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`2`, `6`), CB(`0`, `5`), CB(`5`, `10`)}));
299	// Total overlap, partial overlap, no overlap.
300	ASSERT_EQ(
301	OverlapKind::NoOverlap,
302	overlaps(
303	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`2`, `6`), CB(`11`, `15`), CB(`0`, `5`)}));
304
305	// Total overlap (B) in 2 dims, total overlap (A) in another.
306	ASSERT_EQ(
307	OverlapKind::Contains,
308	overlaps({CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `4`)}, {CB(`0`, `2`), CB(`0`, `3`), CB(`0`, `4`)}));
309
310	// Total overlap (A) in 2 dims, total overlap (B) in another.
311	ASSERT_EQ(
312	OverlapKind::Contains,
313	overlaps(
314	{CB(`0`, `12`), CB(`0`, `15`), CB(`0`, `4`)}, {CB(`0`, `2`), CB(`0`, `3`), CB(`0`, `14`)}));
315
316	// Total (B), No Overlap, Total (A).
317	ASSERT_EQ(
318	OverlapKind::NoOverlap,
319	overlaps(
320	{CB(`0`, `2`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`0`, `6`), CB(`11`, `15`), CB(`1`, `2`)}));
321	}
322
323	// Test the helper we use to subtract bounds: returns the regions(s) of A which
324	// remain after removing the region of B.
325	TEST(MemDependency, BoundSubtract) {
326	using namespace analysis;
327
328	auto CB = [](int s, int e) {
329	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
330	};
331	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
332	return indexBoundsEquals(x, y);
333	};
334
335	// One element subtract.
336	ASSERT_EQ(subtractBound(CB(`0`, `0`), CB(`0`, `0`)).size(), `0`);
337	ASSERT_EQ(subtractBound(CB(`5`, `5`), CB(`5`, `5`)).size(), `0`);
338
339	// No Overlap.
340	ASSERT_TRUE(EQ(subtractBound(CB(`5`, `5`), CB(`2`, `2`)), {CB(`5`, `5`)}));
341	ASSERT_TRUE(EQ(subtractBound(CB(`5`, `5`), CB(`0`, `4`)), {CB(`5`, `5`)}));
342
343	// one side overlap.
344	ASSERT_TRUE(EQ(subtractBound(CB(`1`, `5`), CB(`4`, `7`)), {CB(`1`, `3`)}));
345	ASSERT_TRUE(EQ(subtractBound(CB(`0`, `5`), CB(`5`, `7`)), {CB(`0`, `4`)}));
346	ASSERT_TRUE(EQ(subtractBound(CB(`4`, `5`), CB(`1`, `4`)), {CB(`5`, `5`)}));
347	ASSERT_TRUE(EQ(subtractBound(CB(`1`, `5`), CB(`0`, `4`)), {CB(`5`, `5`)}));
348
349	// both sides overlap.
350	ASSERT_TRUE(EQ(subtractBound(CB(`1`, `5`), CB(`0`, `7`)), {}));
351	ASSERT_TRUE(EQ(subtractBound(CB(`5`, `5`), CB(`5`, `7`)), {}));
352
353	// internal overlap.
354	ASSERT_TRUE(EQ(subtractBound(CB(`1`, `5`), CB(`2`, `3`)), {CB(`1`, `1`), CB(`4`, `5`)}));
355	ASSERT_TRUE(EQ(subtractBound(CB(`0`, `5`), CB(`2`, `4`)), {CB(`0`, `1`), CB(`5`, `5`)}));
356	}
357
358	TEST(MemDependency, BoundSubtractSymbolic) {
359	VarHandle x("x", kInt);
360	VarHandle y("y", kInt);
361	VarHandle z("z", kInt);
362	VarHandle w("w", kInt);
363
364	using namespace analysis;
365
366	auto CB = [](ExprHandle s, ExprHandle e) {
367	return Bound (s.node(), e.node());
368	};
369	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
370	return indexBoundsEquals(x, y);
371	};
372
373	// One element subtract.
374	// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
375	ASSERT_TRUE(EQ(subtractBound(CB(x, x), CB(x, x)), {}));
376	ASSERT_TRUE(EQ(subtractBound(CB(x + `1`, x + `1`), CB(x + `1`, x + `1`)), {}));
377	ASSERT_TRUE(EQ(subtractBound(CB(x * `2`, x * `2`), CB(x * `2`, x * `2`)), {}));
378
379	// Subtract constant range low.
380	ASSERT_TRUE(
381	EQ(subtractBound(CB(x, x + `10`), CB(x, x + `4`)), {CB(x + `5`, x + `10`)}));
382	// Subtract constant range high.
383	ASSERT_TRUE(
384	EQ(subtractBound(CB(x, x + `10`), CB(x + `6`, x + `12`)), {CB(x, x + `5`)}));
385	// Subtract constant range total overlap.
386	ASSERT_TRUE(EQ(subtractBound(CB(x, x + `10`), CB(x, x + `10`)), {}));
387	ASSERT_TRUE(EQ(subtractBound(CB(x + `2`, x + `10`), CB(x, x + `12`)), {}));
388	// Subtract constant range internal.
389	ASSERT_TRUE(
390	EQ(subtractBound(CB(x, x + `10`), CB(x + `3`, x + `7`)),
391	{CB(x, x + `2`), CB(x + `8`, x + `10`)}));
392
393	// Size is inferable but not constant, only works with a single var.
394	ASSERT_TRUE(EQ(subtractBound(CB(`0`, x), CB(`0`, x * `2`)), {}));
395	ASSERT_TRUE(EQ(subtractBound(CB(`0`, x * `2`), CB(`0`, x - `1`)), {CB(x, x * `2`)}));
396
397	// Size is not inferable.
398	ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(z, w)), {CB(x, y)}));
399	ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(x, z)), {CB(x, y)}));
400	ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(`0`, x)), {CB(x, y)}));
401	ASSERT_TRUE(EQ(subtractBound(CB(x, x), CB(`0`, `0`)), {CB(x, x)}));
402	}
403
404	// Tests the helper function that does subtraction, but for multi dimensional
405	// indices bounds.
406	TEST(MemDependency, BoundSubtractMultiDim) {
407	using namespace analysis;
408
409	auto CB = [](int s, int e) {
410	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
411	};
412	auto EQ = [](std::vector<IndexBounds> x, std::vector<IndexBounds> y) {
413	if (x.size() != y.size()) {
414	return false;
415	}
416	for (auto i = `0U`; i < x.size(); ++i) {
417	if (!indexBoundsEquals(x [i], y [i])) {
418	return false;
419	}
420	}
421	return true;
422	};
423
424	// sanity check one dimension.
425	ASSERT_TRUE(EQ(subtractIndicesBounds({CB(`0`, `9`)}, {CB(`0`, `9`)}), {}));
426	ASSERT_TRUE(EQ(subtractIndicesBounds({CB(`3`, `9`)}, {CB(`0`, `12`)}), {}));
427	ASSERT_TRUE(
428	EQ(subtractIndicesBounds({CB(`0`, `12`)}, {CB(`0`, `9`)}), {{CB(`10`, `12`)}}));
429	ASSERT_TRUE(
430	EQ(subtractIndicesBounds({CB(`0`, `12`)}, {CB(`3`, `12`)}), {{CB(`0`, `2`)}}));
431	ASSERT_TRUE(EQ(
432	subtractIndicesBounds({CB(`0`, `9`)}, {CB(`1`, `8`)}), {{CB(`0`, `0`)}, {CB(`9`, `9`)}}));
433
434	// Multi dim total overlap.
435	ASSERT_TRUE(EQ(
436	subtractIndicesBounds({CB(`0`, `9`), CB(`0`, `2`)}, {CB(`0`, `9`), CB(`0`, `2`)}), {}));
437	ASSERT_TRUE(EQ(
438	subtractIndicesBounds({CB(`0`, `9`), CB(`0`, `2`)}, {CB(`0`, `10`), CB(`0`, `20`)}), {}));
439
440	// Mutli dim one way partial in dim 1.
441	ASSERT_TRUE(
442	EQ(subtractIndicesBounds({CB(`0`, `9`), CB(`0`, `2`)}, {CB(`0`, `3`), CB(`0`, `2`)}),
443	{{CB(`4`, `9`), CB(`0`, `2`)}}));
444
445	// Mutli dim one way partial in dim 2.
446	ASSERT_TRUE(
447	EQ(subtractIndicesBounds({CB(`0`, `9`), CB(`0`, `20`)}, {CB(`0`, `9`), CB(`0`, `10`)}),
448	{{CB(`0`, `9`), CB(`11`, `20`)}}));
449
450	// Partial overlap in 2 dims.
451	ASSERT_TRUE(
452	EQ(subtractIndicesBounds({CB(`0`, `5`), CB(`0`, `5`)}, {CB(`2`, `8`), CB(`2`, `8`)}),
453	{{CB(`0`, `1`), CB(`0`, `5`)}, {CB(`2`, `5`), CB(`0`, `1`)}}));
454
455	// Partial overlap in 3 dims.
456	ASSERT_TRUE(
457	EQ(subtractIndicesBounds(
458	{CB(`0`, `5`), CB(`0`, `5`), CB(`0`, `5`)}, {CB(`2`, `8`), CB(`2`, `8`), CB(`2`, `8`)}),
459	{{CB(`0`, `1`), CB(`0`, `5`), CB(`0`, `5`)},
460	{CB(`2`, `5`), CB(`0`, `1`), CB(`0`, `5`)},
461	{CB(`2`, `5`), CB(`2`, `5`), CB(`0`, `1`)}}));
462	}
463
464	// Tests the multi dimensional subtraction code for bounds that cannot be fully
465	// materialized.
466	TEST(MemDependency, BoundSubtractMultiDimSymbolic) {
467	VarHandle x("x", kInt);
468	VarHandle y("y", kInt);
469
470	using namespace analysis;
471
472	auto CB = [](ExprHandle s, ExprHandle e) {
473	return Bound (s.node(), e.node());
474	};
475
476	auto EQ = [](std::vector<IndexBounds> x, std::vector<IndexBounds> y) {
477	if (x.size() != y.size()) {
478	return false;
479	}
480	for (auto i = `0U`; i < x.size(); ++i) {
481	if (!indexBoundsEquals(x [i], y [i])) {
482	return false;
483	}
484	}
485	return true;
486	};
487
488	// Cannot determine overlaps.
489	// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
490	ASSERT_TRUE(EQ(subtractIndicesBounds({CB(x, x)}, {CB(`0`, `0`)}), {{CB(x, x)}}));
491
492	// Various total Overlaps.
493	ASSERT_TRUE(EQ(
494	subtractIndicesBounds({CB(x, x), CB(x, x)}, {CB(x, x), CB(x, x)}), {}));
495	ASSERT_TRUE(EQ(
496	subtractIndicesBounds({CB(x, y), CB(x, y)}, {CB(x, y), CB(x, y)}), {}));
497	ASSERT_TRUE(EQ(
498	subtractIndicesBounds({CB(x, x), CB(y, y)}, {CB(x, x), CB(y, y)}), {}));
499	ASSERT_TRUE(EQ(
500	subtractIndicesBounds({CB(`0`, x), CB(`0`, y)}, {CB(`0`, x), CB(`0`, y)}), {}));
501
502	// one-way overlap in first dim.
503	ASSERT_TRUE(
504	EQ(subtractIndicesBounds({CB(`0`, x), CB(`0`, y)}, {CB(`0`, x - `5`), CB(`0`, y)}),
505	{{CB(x - `4`, x), CB(`0`, y)}}));
506	// second dim.
507	ASSERT_TRUE(
508	EQ(subtractIndicesBounds({CB(`0`, x), CB(`0`, y)}, {CB(`0`, x), CB(`5`, y)}),
509	{{CB(`0`, x), CB(`0`, `4`)}}));
510
511	// Internal overlap in first dim.
512	ASSERT_TRUE(
513	EQ(subtractIndicesBounds({CB(`0`, x), CB(`0`, y)}, {CB(`2`, x - `5`), CB(`0`, y)}),
514	{{CB(`0`, `1`), CB(`0`, y)}, {CB(x - `4`, x), CB(`0`, y)}}));
515	// second dim.
516	ASSERT_TRUE(EQ(
517	subtractIndicesBounds({CB(`0`, x), CB(`0`, y)}, {CB(`0`, x), CB(`10`, y - `10`)}),
518	{{CB(`0`, x), CB(`0`, `9`)}, {CB(`0`, x), CB(y - `9`, y)}}));
519
520	// Overlap in both dimensions.
521	ASSERT_TRUE(
522	EQ(subtractIndicesBounds(
523	{CB(`0`, x), CB(`0`, y)}, {CB(`5`, x - `5`), CB(`10`, y - `10`)}),
524	{
525	{CB(`0`, `4`), CB(`0`, y)},
526	{CB(x - `4`, x), CB(`0`, y)},
527	{CB(`0`, x), CB(`0`, `9`)},
528	{CB(`0`, x), CB(y - `9`, y)},
529	}));
530	}
531
532	// Simple check that the analyzer does anything at all...
533	TEST(MemDependency, MemDependencyCheckerSimple) {
534	BufHandle a("A", {`1`}, kInt);
535	BufHandle b("B", {`1`}, kInt);
536
537	analysis::MemDependencyChecker analyzer;
538
539	/*
540	* A[0] = 3;
541	* B[0] = A[0] + 1;
542	*/
543
544	StorePtr aStore = Store::make(a, {`0`}, `3`);
545	StorePtr bStore = Store::make(b, {`0`}, Add::make(Load::make(a, {`0`}), `1`));
546
547	StmtPtr stmt = Block::make({aStore, bStore});
548
549	stmt ->accept(&analyzer);
550
551	ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore));
552	ASSERT_FALSE(analyzer.dependsIndirectly(aStore, bStore));
553	// sanity check, but anything that depends directly must depend indirectly.
554	ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aStore));
555	}
556
557	// Check that there is a difference between direct and indirect dependence.
558	TEST(MemDependency, MemDependencyCheckerMultiStmt) {
559	BufHandle a("A", {`1`}, kInt);
560	BufHandle b("B", {`1`}, kInt);
561	BufHandle c("C", {`1`}, kInt);
562
563	analysis::MemDependencyChecker analyzer;
564
565	/*
566	* A[0] = 3;
567	* B[0] = A[0];
568	* C[0] = B[0] + 1;
569	*/
570
571	StorePtr aStore = Store::make(a, {`0`}, `3`);
572	StorePtr bStore = Store::make(b, {`0`}, Load::make(a, {`0`}));
573	StorePtr cStore = Store::make(c, {`0`}, Add::make(Load::make(b, {`0`}), `1`));
574
575	StmtPtr stmt = Block::make({aStore, bStore, cStore});
576
577	stmt ->accept(&analyzer);
578
579	// C depends on A indirectly.
580	ASSERT_FALSE(analyzer.dependsDirectly(cStore, aStore));
581	ASSERT_TRUE(analyzer.dependsIndirectly(cStore, aStore));
582
583	// C depends on B directly, which depends on A directly.
584	ASSERT_TRUE(analyzer.dependsDirectly(cStore, bStore));
585	ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore));
586
587	// Dependency goes top to bottom only.
588	ASSERT_FALSE(analyzer.dependsIndirectly(bStore, cStore));
589	ASSERT_FALSE(analyzer.dependsIndirectly(aStore, bStore));
590	ASSERT_FALSE(analyzer.dependsIndirectly(aStore, cStore));
591	}
592
593	// Verify that we do filter writes that are totally overlapped by later writes.
594	TEST(MemDependency, MemDependencyCheckerOverlap) {
595	BufHandle a("A", {`1`}, kInt);
596	BufHandle b("B", {`1`}, kInt);
597
598	analysis::MemDependencyChecker analyzer;
599
600	/*
601	* A[0] = 3;
602	* A[0] = 6;
603	* B[0] = A[0] + 1;
604	*/
605
606	StorePtr aStore = Store::make(a, {`0`}, `3`);
607	StorePtr a2Store = Store::make(a, {`0`}, `6`);
608	StorePtr bStore = Store::make(b, {`0`}, Add::make(Load::make(a, {`0`}), `1`));
609
610	StmtPtr stmt = Block::make({aStore, a2Store, bStore});
611
612	stmt ->accept(&analyzer);
613
614	// B store depends on second A store but not first since it is completely
615	// overlapped.
616	ASSERT_TRUE(analyzer.dependsIndirectly(bStore, a2Store));
617	ASSERT_FALSE(analyzer.dependsIndirectly(bStore, aStore));
618
619	// No dependency between either A store.
620	ASSERT_FALSE(analyzer.dependsIndirectly(aStore, a2Store));
621	ASSERT_FALSE(analyzer.dependsIndirectly(a2Store, aStore));
622	}
623
624	// Verify that bounds match loop iterations, and that dependencies progress
625	// across loop scopes.
626	TEST(MemDependency, MemDependencyCheckerLoop) {
627	BufHandle a("A", {`1`}, kInt);
628	BufHandle b("B", {`1`}, kInt);
629	VarHandle x("x", kInt);
630
631	using namespace analysis;
632
633	MemDependencyChecker analyzer;
634
635	/*
636	* for (int x = 0; x < 10; ++x) {
637	* A[x] = x;
638	* }
639	* B[0] = A[0] + 1;
640	*/
641
642	StorePtr aStore = Store::make(a, {x}, x);
643	StmtPtr loop = For::make(x, `0`, `10`, aStore);
644	StorePtr bStore = Store::make(b, {`0`}, Add::make(Load::make(a, {`4`}), `1`));
645
646	StmtPtr stmt = Block::make({loop, bStore});
647
648	stmt ->accept(&analyzer);
649
650	// Same A->B dependency.
651	ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore));
652
653	// B depends on the loop.
654	ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop));
655	// A is in the loop but does not depend on any loop iteration.
656	ASSERT_FALSE(analyzer.dependsIndirectly(aStore, loop));
657
658	auto aStoreAccess = analyzer.accessFor(aStore);
659	ASSERT_NE(aStoreAccess, nullptr);
660
661	// It should have bounds covering the range of x: 0 <= x < 10.
662	ASSERT_TRUE(indexBoundsEquals(
663	aStoreAccess ->bounds(), {Bound (alloc<IntImm>(`0`), alloc<IntImm>(`9`))}));
664	}
665
666	// Reductions should promote dependencies as well.
667	TEST(MemDependency, MemDependencyCheckerLoopReduce) {
668	BufHandle a("A", {`10`}, kInt);
669	BufHandle b("B", {`10`}, kInt);
670	VarHandle x("x", kInt);
671
672	using namespace analysis;
673
674	MemDependencyChecker analyzer;
675
676	/*
677	* A[0] = 0;
678	* for (int x = 0; x < 10; ++x) {
679	* A[0] = A[x] + 1;
680	* }
681	* B[0] = A[0];
682	*/
683
684	StorePtr aInit = Store::make(a, {`0`}, `0`);
685	ExprHandle reduce = Sum ()(a, `1`, {x}, {x});
686	StorePtr aReduce = Store::make(a, {`0`}, reduce);
687	StmtPtr loop = For::make(x, `0`, `10`, aReduce);
688	StorePtr bStore = Store::make(b, {`0`}, Load::make(a, {`0`}));
689
690	StmtPtr stmt = Block::make({aInit, loop, bStore});
691
692	stmt ->accept(&analyzer);
693
694	// B -> A.
695	ASSERT_TRUE(analyzer.dependsDirectly(bStore, aReduce));
696
697	// B depends indirectly on the intializer of A, since the reduction depends
698	// on it.
699	ASSERT_FALSE(analyzer.dependsDirectly(bStore, aInit));
700	ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aInit));
701
702	ASSERT_TRUE(analyzer.dependsDirectly(aReduce, aInit));
703
704	// B depends on the loop.
705	ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop));
706	// A is in the loop and depends on other iterations.
707	ASSERT_TRUE(analyzer.dependsDirectly(aReduce, loop));
708
709	// The loop contents depend on the initializer too.
710	ASSERT_TRUE(analyzer.dependsDirectly(loop, aInit));
711
712	// Find loads within the reduction:
713	auto reduceLoads = NodeFinder<Load>::find(reduce.node());
714	// Pull out the access for the load inside the loop.
715	for (auto load : reduceLoads) {
716	auto loopLoad = analyzer.accessFor(load);
717	// It should have 10 element long bounds.
718	ASSERT_TRUE(indexBoundsEquals(
719	loopLoad ->bounds(), {Bound (alloc<IntImm>(`0`), alloc<IntImm>(`9`))}));
720	}
721	}
722
723	// Lowering a reduction doesn't affect dependency analysis.
724	TEST(MemDependency, MemDependencyCheckerLoopReduceExpanded) {
725	BufHandle a("A", {`10`}, kInt);
726	BufHandle b("B", {`10`}, kInt);
727	VarHandle x("x", kInt);
728
729	using namespace analysis;
730
731	MemDependencyChecker analyzer;
732
733	/*
734	* A[0] = 0;
735	* for (int x = 0; x < 10; ++x) {
736	* A[0] = A[x] + 1;
737	* }
738	* B[0] = A[0];
739	*/
740
741	StorePtr aInit = Store::make(a, {`0`}, `0`);
742	ExprHandle aLoad = Load::make(a, {x});
743	StorePtr aReduce = Store::make(a, {`0`}, Add::make(aLoad, `1`));
744	StmtPtr loop = For::make(x, `0`, `10`, aReduce);
745	StorePtr bStore = Store::make(b, {`0`}, Load::make(a, {`0`}));
746
747	StmtPtr stmt = Block::make({aInit, loop, bStore});
748
749	stmt ->accept(&analyzer);
750
751	// B -> A.
752	ASSERT_TRUE(analyzer.dependsDirectly(bStore, aReduce));
753
754	// B depends indirectly on the intializer of A, since the reduction depends
755	// on it.
756	ASSERT_FALSE(analyzer.dependsDirectly(bStore, aInit));
757	ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aInit));
758
759	ASSERT_TRUE(analyzer.dependsDirectly(aReduce, aInit));
760
761	// B depends on the loop.
762	ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop));
763	// A is in the loop and depends on other iterations.
764	ASSERT_TRUE(analyzer.dependsDirectly(aReduce, loop));
765
766	// The loop contents depend on the initializer too.
767	ASSERT_TRUE(analyzer.dependsDirectly(loop, aInit));
768
769	// Pull out the access for the store inside the loop.
770	auto loopLoad = analyzer.accessFor(aLoad.node());
771	// It should have 10 element long bounds.
772	ASSERT_TRUE(indexBoundsEquals(
773	loopLoad ->bounds(), {Bound (alloc<IntImm>(`0`), alloc<IntImm>(`9`))}));
774	}
775
776	// Can determine dependencies of outputs, through to inputs.
777	TEST(MemDependency, MemDependencyCheckerInputsOutputs) {
778	BufHandle a("A", {`10`}, kInt);
779	BufHandle b("B", {`10`}, kInt);
780	VarHandle x("x", kInt);
781
782	// initialize analyzer with inputs and outputs.
783	analysis::MemDependencyChecker analyzer({a}, {b});
784
785	// Here's a Relu.
786	/*
787	* for (int x = 0; x < 10; ++x) {
788	* B[x] = Max(A[x], 0);
789	* }
790	*/
791
792	ExprHandle aLoad = Load::make(a, {x});
793	StorePtr bStore = Store::make(b, {x}, Max::make(aLoad, `0`, true));
794	StmtPtr loop = For::make(x, `0`, `10`, bStore);
795
796	StmtPtr stmt = Block::make({loop});
797
798	stmt ->accept(&analyzer);
799
800	// Output depends indirectly on input.
801	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
802	// aLoad depends directly on the input A.
803	ASSERT_TRUE(analyzer.dependsDirectly(aLoad.node(), a.node()));
804	// bStore therefore depends directly on the input A.
805	ASSERT_TRUE(analyzer.dependsDirectly(bStore, a.node()));
806	// The output depends directly on the store.
807	ASSERT_TRUE(analyzer.dependsDirectly(b.node(), bStore));
808
809	// Check AccessInfo based overloads.
810	auto input = analyzer.input(a.node());
811	auto output = analyzer.output(b.node());
812
813	// Output depends indirectly on input.
814	ASSERT_TRUE(analyzer.dependsIndirectly(output, input));
815	// Not directly.
816	ASSERT_FALSE(analyzer.dependsDirectly(output, input));
817	// Not in reverse order.
818	ASSERT_FALSE(analyzer.dependsIndirectly(input, output));
819
820	// output -> bStore -> bLoad -> input.
821	auto storeAccess = analyzer.accessFor(bStore);
822	auto loadAccess = analyzer.accessFor(aLoad.node());
823
824	ASSERT_TRUE(analyzer.dependsDirectly(output, storeAccess));
825	ASSERT_TRUE(analyzer.dependsDirectly(loadAccess, input));
826	}
827
828	// Can tell if an output does not depend on an input.
829	TEST(MemDependency, MemDependencyCheckerOutputDoesntDepend) {
830	BufHandle a("A", {`10`}, kInt);
831	BufHandle b("B", {`10`}, kInt);
832	VarHandle x("x", kInt);
833
834	// initialize analyzer with inputs and outputs.
835	analysis::MemDependencyChecker analyzer({a}, {b});
836
837	// Here's a dumb Relu.
838	/*
839	* for (int x = 0; x < 10; ++x) {
840	* B[x] = Max(x, 0);
841	* }
842	*/
843
844	StorePtr bStore = Store::make(b, {x}, Max::make(x, `0`, true));
845	StmtPtr loop = For::make(x, `0`, `10`, bStore);
846
847	StmtPtr stmt = Block::make({loop});
848
849	stmt ->accept(&analyzer);
850
851	// Output does not depend indirectly on input.
852	ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), a.node()));
853
854	// The output still depends directly on the store.
855	ASSERT_TRUE(analyzer.dependsDirectly(b.node(), bStore));
856
857	// Check AccessInfo based overloads.
858	auto input = analyzer.input(a.node());
859	auto output = analyzer.output(b.node());
860
861	// Output does not depend indirectly on input.
862	ASSERT_FALSE(analyzer.dependsIndirectly(output, input));
863	}
864
865	// Verify different loop extents produce accesses with different bounds, and
866	// that later accesses find dependencies that overlap their entire bound range.
867	TEST(MemDependency, MemDependencyCheckerLoopBounds) {
868	BufHandle a("A", {`10`}, kInt);
869	BufHandle b("B", {`10`}, kInt);
870	BufHandle c("C", {`10`}, kInt);
871	VarHandle x("x", kInt);
872	using namespace analysis;
873
874	MemDependencyChecker analyzer({a}, {c});
875
876	// This enables using the execution order of the loops to determine if some
877	// loops are self dependent or not.
878	analyzer.allowLoopExecutionOrderAnalysis();
879
880	/*
881	* for (int x = 1; x < 10; ++x) {
882	* B[x] = A[x];
883	* }
884	* for (int x = 1; x < 9; ++x) {
885	* B[x] = B[x] * 2;
886	* }
887	* for (int x = 3; x < 4; ++x) {
888	* C[x] = A[x];
889	* }
890	* for (int x = 0; x < 10; ++x) {
891	* C[x] = B[x];
892	* }
893	*/
894
895	std::vector<StmtPtr> stmts(
896	{For::make(x, `1`, `10`, Store::make(b, {x}, Load::make(a, {x}))),
897	For::make(
898	x, `1`, `9`, Store::make(b, {x}, Mul::make(Load::make(b, {x}), `2`))),
899	For::make(x, `3`, `4`, Store::make(c, {x}, Load::make(a, {x}))),
900	For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(b, {x})))});
901
902	StmtPtr stmt = Block::make(stmts);
903
904	stmt ->accept(&analyzer);
905
906	auto input = analyzer.input(a.node());
907	auto output = analyzer.output(c.node());
908
909	// sanity check Output -> Input.
910	ASSERT_TRUE(analyzer.dependsIndirectly(output, input));
911
912	// Check the For loop dependencies:
913
914	// Last write to C depends on both writes to B since they contain the last
915	// write to at least one element.
916	ASSERT_TRUE(analyzer.dependsIndirectly(stmts[`3`], stmts[`1`]));
917	ASSERT_TRUE(analyzer.dependsIndirectly(stmts[`3`], stmts[`0`]));
918
919	// The last write to C does not depend on the other write to C.
920	ASSERT_FALSE(analyzer.dependsIndirectly(stmts[`3`], stmts[`2`]));
921
922	auto CB = [](int s, int e) {
923	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
924	};
925	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
926	return indexBoundsEquals(x, y);
927	};
928
929	/ 0. Input: A[(0, 9)] - dependents: 1 5*
930	* 1. Load: A[(1, 9)] - depends on: 0 - dependents: 2
931	* 2. Store: B[(1, 9)] - depends on: 1 - dependents: 3 7
932	* 3. Load: B[(1, 8)] - depends on: 2 - dependents: 4
933	* 4. Store: B[(1, 8)] - depends on: 3 - dependents: 7
934	* 5. Load: A[(3, 3)] - depends on: 0 - dependents: 6
935	* 6. Store: C[(3, 3)] - depends on: 5
936	* 7. Load: B[(0, 9)] - depends on: 2 4 - dependents: 8
937	* 8. Store: C[(0, 9)] - depends on: 7 - dependents: 9
938	* 9. Output: C[(0, 9)] - depends on: 8
939	*/
940
941	// Now let's look at the bounds of each access.
942	// There are 9 accesses in this Stmt, so this is exhaustive, we wont do this
943	// much.
944	auto history = analyzer.getHistory();
945	ASSERT_EQ(history.size(), `10`);
946	VarPtr aVar = a.node()->base_handle();
947	VarPtr bVar = b.node()->base_handle();
948	VarPtr cVar = c.node()->base_handle();
949
950	// The first access is the input A.
951	ASSERT_EQ(history[`0`]->type(), AccessType::Input);
952	ASSERT_EQ(history[`0`]->var(), aVar);
953	// It has the bounds of the producing Input.
954	ASSERT_TRUE(EQ(history[`0`]->bounds(), {CB(`0`, `9`)}));
955	// sanity check the input we retrieved earlier matches.
956	ASSERT_EQ(history[`0`], input);
957
958	// The second access is the load of A in the first loop.
959	ASSERT_EQ(history[`1`]->type(), AccessType::Load);
960	ASSERT_EQ(history[`1`]->var(), aVar);
961	// It has the bounds of the loop, i.e. start == 1.
962	ASSERT_TRUE(EQ(history[`1`]->bounds(), {CB(`1`, `9`)}));
963	// It reads from A, so it should have a dependency on the last write to this
964	// range - with is the input.
965	ASSERT_EQ(history[`1`]->dependencies().size(), `1`);
966	ASSERT_TRUE(history[`1`]->hasDependency(history[`0`]));
967
968	// The third access is the store into B in the first loop.
969	ASSERT_EQ(history[`2`]->type(), AccessType::Store);
970	ASSERT_EQ(history[`2`]->var(), bVar);
971	// It also has the bounds of the loop, i.e. start == 1.
972	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`1`, `9`)}));
973	// The previous load is in its RHS, so it depends on it.
974	ASSERT_EQ(history[`2`]->dependencies().size(), `1`);
975	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
976
977	// The third access is the load from B in the second loop.
978	ASSERT_EQ(history[`3`]->type(), AccessType::Load);
979	ASSERT_EQ(history[`3`]->var(), bVar);
980	// It has the bounds of the second loop, i.e. >= 1 < 9.
981	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`1`, `8`)}));
982	// It reads from B in a smaller range, so should depend on the previous
983	// store.
984	ASSERT_EQ(history[`3`]->dependencies().size(), `1`);
985	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
986
987	// The fourth: the store to B in the second loop.
988	ASSERT_EQ(history[`4`]->type(), AccessType::Store);
989	ASSERT_EQ(history[`4`]->var(), bVar);
990	// It also has the bounds of the second loop.
991	ASSERT_TRUE(EQ(history[`4`]->bounds(), {CB(`1`, `8`)}));
992	// The previous load is in its RHS, so it depends on it as before.
993	ASSERT_EQ(history[`4`]->dependencies().size(), `1`);
994	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
995
996	// The fifth access is the load is from the 3rd loop, and skips previous B
997	// accesses.
998	ASSERT_EQ(history[`5`]->type(), AccessType::Load);
999	ASSERT_EQ(history[`5`]->var(), aVar);
1000	// It has the bounds of the third loop: >= 3 < 4.
1001	ASSERT_TRUE(EQ(history[`5`]->bounds(), {CB(`3`, `3`)}));
1002	// It depends on the last thing to write to A, which is the A input.
1003	ASSERT_EQ(history[`5`]->dependencies().size(), `1`);
1004	ASSERT_TRUE(history[`5`]->hasDependency(history[`0`]));
1005
1006	// Sixth: the store into the output C.
1007	ASSERT_EQ(history[`6`]->type(), AccessType::Store);
1008	ASSERT_EQ(history[`6`]->var(), cVar);
1009	// It also has the bounds of the third loop.
1010	ASSERT_TRUE(EQ(history[`6`]->bounds(), {CB(`3`, `3`)}));
1011	// The previous load is in its RHS, so it depends on it as always.
1012	ASSERT_EQ(history[`6`]->dependencies().size(), `1`);
1013	ASSERT_TRUE(history[`6`]->hasDependency(history[`5`]));
1014
1015	// The seventh access is the load of B in the fourth loop.
1016	ASSERT_EQ(history[`7`]->type(), AccessType::Load);
1017	ASSERT_EQ(history[`7`]->var(), bVar);
1018	// It has the bounds of the final loop, >= 0 < 10
1019	ASSERT_TRUE(EQ(history[`7`]->bounds(), {CB(`0`, `9`)}));
1020	// The bounds of this read are larger than the bounds of the previous write,
1021	// so it depends on both previous Stores to B.
1022	ASSERT_EQ(history[`7`]->dependencies().size(), `2`);
1023	ASSERT_TRUE(history[`7`]->hasDependency(history[`2`]));
1024	ASSERT_TRUE(history[`7`]->hasDependency(history[`4`]));
1025
1026	// Eight: the final store into the output C.
1027	ASSERT_EQ(history[`8`]->type(), AccessType::Store);
1028	ASSERT_EQ(history[`8`]->var(), cVar);
1029	// It also has the bounds of the final loop.
1030	ASSERT_TRUE(EQ(history[`8`]->bounds(), {CB(`0`, `9`)}));
1031	// The previous load is in its RHS, so it depends on it as always.
1032	ASSERT_EQ(history[`8`]->dependencies().size(), `1`);
1033	ASSERT_TRUE(history[`8`]->hasDependency(history[`7`]));
1034
1035	// The last access represents the output Buf.
1036	ASSERT_EQ(history[`9`]->type(), AccessType::Output);
1037	ASSERT_EQ(history[`9`]->var(), cVar);
1038	// It has the bounds of the output Buf.
1039	ASSERT_TRUE(EQ(history[`9`]->bounds(), {CB(`0`, `9`)}));
1040	// sanity check the input we retrieved earlier matches.
1041	ASSERT_EQ(history[`9`], output);
1042	// It depends on the last write to C only.
1043	ASSERT_EQ(history[`9`]->dependencies().size(), `1`);
1044	ASSERT_TRUE(history[`9`]->hasDependency(history[`8`]));
1045	}
1046
1047	// Verify that we can still infer bounds when the loop var is offset.
1048	TEST(MemDependency, MemDependencyCheckerLoopBoundsIndexShift) {
1049	BufHandle a("A", {`10`}, kInt);
1050	BufHandle b("B", {`10`}, kInt);
1051	VarHandle x("x", kInt);
1052
1053	using namespace analysis;
1054
1055	MemDependencyChecker analyzer({a}, {b});
1056
1057	// This enables using the execution order of the loops to determine if some
1058	// loops are self dependent or not.
1059	analyzer.allowLoopExecutionOrderAnalysis();
1060
1061	/*
1062	* for (int x = 1; x < 10; x++) {
1063	* A[x] = A[x - 1];
1064	* }
1065	* for (int x = 0; x < 9; x++) {
1066	* A[x] = A[x + 1];
1067	* }
1068	* for (int x = 0; x < 9; x++) {
1069	* A[9 - x] = A[8 - x];
1070	* }
1071	* for (int x = 0; x < 10; x++) {
1072	* A[x] = A[9 - x];
1073	* }
1074	* for (int x = 0; x < 10; x++) {
1075	* B[x] = A[x];
1076	* }
1077	*/
1078
1079	StmtPtr stmt = Block::make(
1080	{For::make(x, `1`, `10`, Store::make(a, {x}, Load::make(a, {x - `1`}))),
1081	For::make(x, `0`, `9`, Store::make(a, {x}, Load::make(a, {x + `1`}))),
1082	For::make(
1083	x,
1084	`0`,
1085	`9`,
1086	Store::make(
1087	a, {ExprHandle (`9`) - x}, Load::make(a, {ExprHandle (`8`) - x}))),
1088	For::make(
1089	x, `0`, `10`, Store::make(a, {x}, Load::make(a, {ExprHandle (`9`) - x}))),
1090	For::make(x, `0`, `10`, Store::make(b, {x}, Load::make(a, {x})))});
1091
1092	stmt ->accept(&analyzer);
1093
1094	// Sanity check output depends on Input.
1095	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
1096
1097	auto CB = [](int s, int e) {
1098	return Bound (alloc<IntImm>(s), alloc<IntImm>(e));
1099	};
1100	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
1101	return indexBoundsEquals(x, y);
1102	};
1103
1104	/ 0. Input: A[(0, 9)] - dependents: 1*
1105	* 1. Load: A[(0, 8)] - depends on: 0 2 - dependents: 2
1106	* 2. Store: A[(1, 9)] - depends on: 1 - dependents: 1 3
1107	* 3. Load: A[(1, 9)] - depends on: 2 - dependents: 4
1108	* 4. Store: A[(0, 8)] - depends on: 3 - dependents: 5 7
1109	* 5. Load: A[(0, 8)] - depends on: 4 - dependents: 6
1110	* 6. Store: A[(1, 9)] - depends on: 5 - dependents: 7
1111	* 7. Load: A[(0, 9)] - depends on: 4 6 8 - dependents: 8
1112	* 8. Store: A[(0, 9)] - depends on: 7 - dependents: 7 9
1113	* 9. Load: A[(0, 9)] - depends on: 8 - dependents: 10
1114	* 10. Store: B[(0, 9)] - depends on: 9 - dependents: 11
1115	* 11. Output: B[(0, 9)] - depends on: 10
1116	*/
1117
1118	// Now let's look at the bounds of each access.
1119	auto history = analyzer.getHistory();
1120	ASSERT_EQ(history.size(), `12`);
1121	VarPtr aVar = a.node()->base_handle();
1122	VarPtr bVar = b.node()->base_handle();
1123
1124	// The first access is the input A.
1125	ASSERT_EQ(history[`0`]->type(), AccessType::Input);
1126	ASSERT_EQ(history[`0`]->var(), aVar);
1127	// It has the bounds of the producing Input.
1128	ASSERT_TRUE(EQ(history[`0`]->bounds(), {CB(`0`, `9`)}));
1129
1130	// The second access is the load A[x-1].
1131	ASSERT_EQ(history[`1`]->type(), AccessType::Load);
1132	ASSERT_EQ(history[`1`]->var(), aVar);
1133	// It has the bounds of the loop modified by the offset of each index, in
1134	// this case -1.
1135	ASSERT_TRUE(EQ(history[`1`]->bounds(), {CB(`0`, `8`)}));
1136	// It depends on the input, but also the store in the same loop, since
1137	// different interations of the loop depend on each other.
1138	ASSERT_EQ(history[`1`]->dependencies().size(), `2`);
1139	ASSERT_TRUE(history[`1`]->hasDependency(history[`0`]));
1140	ASSERT_TRUE(history[`1`]->hasDependency(history[`2`]));
1141
1142	// The third access is the Store to A[x] in the first loop.
1143	ASSERT_EQ(history[`2`]->type(), AccessType::Store);
1144	ASSERT_EQ(history[`2`]->var(), aVar);
1145	// It has no offset on x, so should have the same bounds as the loop.
1146	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`1`, `9`)}));
1147
1148	// The fourth access is the load A[x+1] in the second loop.
1149	ASSERT_EQ(history[`3`]->type(), AccessType::Load);
1150	ASSERT_EQ(history[`3`]->var(), aVar);
1151	// It has the bounds of the loop (0 <= x < 9) modified by the offset of each
1152	// index, in this case 1.
1153	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`1`, `9`)}));
1154	// This load totally overlaps the previous write to A, so it depends only on
1155	// it and not the input.
1156	ASSERT_EQ(history[`3`]->dependencies().size(), `1`);
1157	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
1158
1159	// The fifth access is the store to A[x] in the second loop.
1160	ASSERT_EQ(history[`4`]->type(), AccessType::Store);
1161	ASSERT_EQ(history[`4`]->var(), aVar);
1162	// It has no offset on x, so should have the same bounds as the loop.
1163	ASSERT_TRUE(EQ(history[`4`]->bounds(), {CB(`0`, `8`)}));
1164
1165	// The sixth access is the load to A[8 - x] in the third loop.
1166	ASSERT_EQ(history[`5`]->type(), AccessType::Load);
1167	ASSERT_EQ(history[`5`]->var(), aVar);
1168	// It has the bounds of the loop (0 <= x < 9) modified by the offset of each
1169	// index, in this case 8 - x.
1170	// This access has a negative stride, which will be normalized.
1171	ASSERT_TRUE(EQ(history[`5`]->bounds(), {CB(`0`, `8`)}));
1172	// This load totally overlaps the most recent write to A, so it depends only
1173	// on it and not the input or the first write to A.
1174	ASSERT_EQ(history[`5`]->dependencies().size(), `1`);
1175	ASSERT_TRUE(history[`5`]->hasDependency(history[`4`]));
1176
1177	// The seventh access is the store to A[9 - x] in the third loop.
1178	ASSERT_EQ(history[`6`]->type(), AccessType::Store);
1179	ASSERT_EQ(history[`6`]->var(), aVar);
1180	// This store has a negative stride on it's indices, but is notmalized
1181	// internally.
1182	ASSERT_TRUE(EQ(history[`6`]->bounds(), {CB(`1`, `9`)}));
1183
1184	// The eighth access is the load A[9-x] in the second loop.
1185	ASSERT_EQ(history[`7`]->type(), AccessType::Load);
1186	ASSERT_EQ(history[`7`]->var(), aVar);
1187	// It has the bounds of the loop (0 <= x < 9), modified by the offset 9 - x,
1188	// which esstentially traverses the loop backwards.
1189	ASSERT_TRUE(EQ(history[`7`]->bounds(), {CB(`0`, `9`)}));
1190	// This Load has three write dependencies:
1191	ASSERT_EQ(history[`7`]->dependencies().size(), `3`);
1192	// The previous store (#6) for elements 1-9*
1193	ASSERT_TRUE(history[`7`]->hasDependency(history[`6`]));
1194	// An earlier store (#4) covering element 0*
1195	ASSERT_TRUE(history[`7`]->hasDependency(history[`4`]));
1196	// A future store inside this loop, since this loop modifies the buffer*
1197	// in a non distinct way (due to the load and store having different access
1198	// strides).
1199	ASSERT_TRUE(history[`7`]->hasDependency(history[`8`]));
1200
1201	// The ninth access is the store to A[x] in the fourth loop.
1202	ASSERT_EQ(history[`8`]->type(), AccessType::Store);
1203	ASSERT_EQ(history[`8`]->var(), aVar);
1204	// This store has a negative stride on it's indices, but is notmalized
1205	// internally.
1206	ASSERT_TRUE(EQ(history[`8`]->bounds(), {CB(`0`, `9`)}));
1207
1208	// The tenth and 11th acceses are the copy from A[x] to B[x].
1209	ASSERT_EQ(history[`9`]->type(), AccessType::Load);
1210	ASSERT_EQ(history[`9`]->var(), aVar);
1211	ASSERT_EQ(history[`10`]->type(), AccessType::Store);
1212	ASSERT_EQ(history[`10`]->var(), bVar);
1213
1214	// The last access represents the output Buf.
1215	ASSERT_EQ(history[`11`]->type(), AccessType::Output);
1216	ASSERT_EQ(history[`11`]->var(), bVar);
1217	// It has the bounds of the output Buf.
1218	ASSERT_TRUE(EQ(history[`11`]->bounds(), {CB(`0`, `9`)}));
1219	// It depends on the last write to B only.
1220	ASSERT_EQ(history[`11`]->dependencies().size(), `1`);
1221	ASSERT_TRUE(history[`11`]->hasDependency(history[`10`]));
1222
1223	// ok that's enough of that.
1224	}
1225
1226	// Check many different cases of loop self dependency - when a load within a
1227	// loop is dependent on a Store later in the same loop but in different
1228	// iteration. This is affected by whether or not we can trust the execution
1229	// order of the loop.
1230	TEST(MemDependency, MemDependencyCheckerLoopSelfDependency) {
1231	BufHandle a("A", {`5`}, kInt);
1232	BufHandle b("B", {`5`}, kInt);
1233	VarHandle x("x", kInt);
1234	VarHandle y("y", kInt);
1235	VarHandle z("z", kInt);
1236
1237	using namespace analysis;
1238
1239	// This check assumes that the Stmt has a single Store with a single Load on
1240	// the RHS.
1241	auto isSelfDependent =
1242	[](const std::vector<std::shared_ptr<AccessInfo>>& history) -> bool {
1243	return history.front()->hasDependency(history.back());
1244	};
1245
1246	{
1247	/ for (int y = 0; y < 10; y++) {*
1248	* A[y] = (A[y]) + 1;
1249	* } */
1250
1251	// Not self dependent since all loop iterations use a different y.
1252
1253	MemDependencyChecker analyzer;
1254	StmtPtr stmt = For::make(
1255	y,
1256	`0`,
1257	`10`,
1258	Block::make({Store::make(a, {y}, Add::make(Load::make(a, {y}), `1`))}));
1259
1260	stmt ->accept(&analyzer);
1261
1262	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1263	}
1264
1265	{
1266	/ for (int y = 0; y < 10; y++) {*
1267	* A[y + 1] = (A[y + 1]) + 1;
1268	* }
1269	*/
1270
1271	// Not self dependent due to different y (with offset).
1272
1273	MemDependencyChecker analyzer;
1274	StmtPtr stmt = For::make(
1275	y,
1276	`0`,
1277	`10`,
1278	Block::make(
1279	{Store::make(a, {y + `1`}, Add::make(Load::make(a, {y + `1`}), `1`))}));
1280
1281	stmt ->accept(&analyzer);
1282
1283	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1284	}
1285
1286	{
1287	/ for (int x = 0; x < 10; x++) {*
1288	* A[0] = (A[0]) + x;
1289	* }
1290	*/
1291
1292	// Is self dependent since all loops use a common constant element of A.
1293
1294	MemDependencyChecker analyzer;
1295	StmtPtr stmt = For::make(
1296	x,
1297	`0`,
1298	`10`,
1299	Block::make({Store::make(a, {`0`}, Add::make(Load::make(a, {`0`}), x))}));
1300	stmt ->accept(&analyzer);
1301
1302	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1303	}
1304
1305	{
1306	/ for (int x = 0; x < 10; x++) {*
1307	* A[0] = (B[0]) + x;
1308	* }
1309	*/
1310
1311	// Is not self dependent beacause there is no store to the buffer that is
1312	// read.
1313
1314	MemDependencyChecker analyzer;
1315	StmtPtr stmt = For::make(
1316	x,
1317	`0`,
1318	`10`,
1319	Block::make({Store::make(a, {`0`}, Add::make(Load::make(b, {`0`}), x))}));
1320	stmt ->accept(&analyzer);
1321
1322	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1323	}
1324
1325	{
1326	/ for (int x = 0; x < 10; x++) {*
1327	* A[y] = (A[y]) + x;
1328	* }
1329	*/
1330
1331	// Is self dependent since all loops use a common symbolic element of A.
1332
1333	MemDependencyChecker analyzer;
1334	StmtPtr stmt = For::make(
1335	x,
1336	`0`,
1337	`10`,
1338	Block::make({Store::make(a, {y}, Add::make(Load::make(a, {y}), x))}));
1339	stmt ->accept(&analyzer);
1340
1341	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1342	}
1343
1344	{
1345	/ for (int x = 0; x < 10; x++) {*
1346	* A[x] = A[x + 1];
1347	* }
1348	*/
1349
1350	// In this case it depends if we are considering execution order.
1351
1352	MemDependencyChecker analyzer;
1353
1354	StmtPtr stmt =
1355	For::make(x, `0`, `10`, Store::make(a, {x}, Load::make(a, {x + `1`})));
1356	stmt ->accept(&analyzer);
1357
1358	// With analysis of order disabled, this is self dependent since the read
1359	// from X+1 and the write to X+1 could be in reverse order.
1360	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1361	}
1362
1363	{
1364	/ for (int x = 0; x < 10; x++) {*
1365	* A[x] = A[x + 1];
1366	* }
1367	*/
1368
1369	MemDependencyChecker analyzer;
1370	analyzer.allowLoopExecutionOrderAnalysis();
1371
1372	StmtPtr stmt =
1373	For::make(x, `0`, `10`, Store::make(a, {x}, Load::make(a, {x + `1`})));
1374	stmt ->accept(&analyzer);
1375
1376	// If order analysis is enabled, this is not dependent since the read for
1377	// each element occurs before the write to that element.
1378	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1379	}
1380
1381	{
1382	/ for (int x = 1; x < 10; x++) {*
1383	* A[x] = A[x - 1];
1384	* }
1385	*/
1386
1387	MemDependencyChecker analyzer;
1388
1389	StmtPtr stmt =
1390	For::make(x, `1`, `10`, Store::make(a, {x}, Load::make(a, {x - `1`})));
1391	stmt ->accept(&analyzer);
1392
1393	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1394	}
1395
1396	{
1397	/ for (int x = 1; x < 10; x++) {*
1398	* A[x] = A[x - 1];
1399	* }
1400	*/
1401
1402	MemDependencyChecker analyzer;
1403	analyzer.allowLoopExecutionOrderAnalysis();
1404
1405	StmtPtr stmt =
1406	For::make(x, `1`, `10`, Store::make(a, {x}, Load::make(a, {x - `1`})));
1407	stmt ->accept(&analyzer);
1408
1409	// In this case, even with order analysis the Load is dependent on the
1410	// Store, since the write to X occurs before the read from X.
1411	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1412	}
1413
1414	{
1415	/ for (int x = 0; x < 9; x++) {*
1416	* A[9 - x] = A[8 - x];
1417	* }
1418	*/
1419
1420	// Still works if the execution order is reversed, so long as the read
1421	// comes before the write.
1422
1423	MemDependencyChecker analyzer;
1424	analyzer.allowLoopExecutionOrderAnalysis();
1425
1426	StmtPtr stmt = For::make(
1427	x,
1428	`3`,
1429	`10`,
1430	Store::make(
1431	a, {ExprHandle (`9`) - x}, Load::make(a, {ExprHandle (`8`) - x})));
1432	stmt ->accept(&analyzer);
1433
1434	// However here was can determine the A store is earlier in the order than
1435	// the load.
1436	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1437	}
1438
1439	{
1440	/ for (int x = 0; x < 9; x++) {*
1441	* A[8 - x] = A[9 - x];
1442	* }
1443	*/
1444
1445	// But not if it doesn't.
1446
1447	MemDependencyChecker analyzer;
1448	analyzer.allowLoopExecutionOrderAnalysis();
1449
1450	StmtPtr stmt = For::make(
1451	x,
1452	`3`,
1453	`10`,
1454	Store::make(
1455	a, {ExprHandle (`8`) - x}, Load::make(a, {ExprHandle (`9`) - x})));
1456	stmt ->accept(&analyzer);
1457
1458	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1459	}
1460
1461	{
1462	/ for (int x = 0; x < 9; x++) {*
1463	* A[9 - x] = A[8 - x];
1464	* }
1465	*/
1466
1467	// And not if we're not relying on execution order.
1468
1469	MemDependencyChecker analyzer;
1470
1471	StmtPtr stmt = For::make(
1472	x,
1473	`3`,
1474	`10`,
1475	Store::make(
1476	a, {ExprHandle (`9`) - x}, Load::make(a, {ExprHandle (`8`) - x})));
1477	stmt ->accept(&analyzer);
1478
1479	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1480	}
1481
1482	{
1483	/ for (int x = 3; x < 10; x++) {*
1484	* A[x - 2] = A[x - 1];
1485	* }
1486	*/
1487
1488	// Forward order but negative indices.
1489
1490	MemDependencyChecker analyzer;
1491	analyzer.allowLoopExecutionOrderAnalysis();
1492
1493	StmtPtr stmt =
1494	For::make(x, `3`, `10`, Store::make(a, {x - `2`}, Load::make(a, {x - `1`})));
1495	stmt ->accept(&analyzer);
1496
1497	// However here was can determine the A store is earlier in the order than
1498	// the load.
1499	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1500	}
1501
1502	{
1503	/ for (int x = 0; x < 10; x++) {*
1504	* A[x * 2] = A[x * 2];
1505	* }
1506	*/
1507
1508	// With an access stride.
1509
1510	MemDependencyChecker analyzer;
1511	// Execution order doesn't matter since the read and the write are totally
1512	// distinct.
1513
1514	StmtPtr stmt =
1515	For::make(x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2`})));
1516	stmt ->accept(&analyzer);
1517
1518	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1519	}
1520
1521	{
1522	/ for (int x = 0; x < 10; x++) {*
1523	* A[x * 2] = A[x * 2 + 1];
1524	* }
1525	*/
1526
1527	// Here we can use the common stride of the accesses to determine they are
1528	// distinct.
1529	// Note, this is the only place (loop self depedency) we use this stride
1530	// to avoid unnecessary depedence.
1531
1532	MemDependencyChecker analyzer;
1533	// Execution order doesn't matter since the read and the write are totally
1534	// distinct.
1535
1536	StmtPtr stmt = For::make(
1537	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` + `1`})));
1538	stmt ->accept(&analyzer);
1539
1540	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1541	}
1542
1543	{
1544	/ for (int x = 0; x < 10; x++) {*
1545	* A[x * 2] = A[x * 2 - 1];
1546	* }
1547	*/
1548
1549	// same if the read is behind the write so long as they are distinct.
1550
1551	MemDependencyChecker analyzer;
1552	StmtPtr stmt = For::make(
1553	x, `1`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` - `1`})));
1554	stmt ->accept(&analyzer);
1555
1556	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1557	}
1558
1559	{
1560	/ for (int x = 0; x < 10; x++) {*
1561	* A[x * 2] = A[x * 2 + 2];
1562	* }
1563	*/
1564
1565	// But not if the offset is in the stride.
1566
1567	MemDependencyChecker analyzer;
1568	StmtPtr stmt = For::make(
1569	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` + `2`})));
1570	stmt ->accept(&analyzer);
1571
1572	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1573	}
1574
1575	{
1576	/ for (int x = 0; x < 10; x++) {*
1577	* A[x * 2] = A[x * 2 - 2];
1578	* }
1579	*/
1580
1581	// Works with negative offsets too.
1582
1583	MemDependencyChecker analyzer;
1584	StmtPtr stmt = For::make(
1585	x, `1`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` - `2`})));
1586	stmt ->accept(&analyzer);
1587
1588	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1589	}
1590
1591	{
1592	/ for (int x = 0; x < 10; x++) {*
1593	* A[x * 2] = A[x * 2 + 7];
1594	* }
1595	*/
1596
1597	// Detects accesses are distinct when offset is large but not a multiple
1598	// of stride.
1599	MemDependencyChecker analyzer;
1600	StmtPtr stmt = For::make(
1601	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` + `7`})));
1602	stmt ->accept(&analyzer);
1603
1604	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1605	}
1606
1607	{
1608	/ for (int x = 0; x < 10; x++) {*
1609	* A[x * 2] = A[x * 2 + 4];
1610	* }
1611	*/
1612
1613	// Works with offsets which are multiples of the stride.
1614	MemDependencyChecker analyzer;
1615	StmtPtr stmt = For::make(
1616	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `2` + `4`})));
1617	stmt ->accept(&analyzer);
1618
1619	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1620	}
1621
1622	{
1623	/ for (int x = 0; x < 10; x++) {*
1624	* A[x * 6] = A[x * 6 + 5];
1625	* }
1626	*/
1627
1628	// detects accesses are distinct with large strides when the offset is
1629	// within.
1630
1631	MemDependencyChecker analyzer;
1632	StmtPtr stmt = For::make(
1633	x, `0`, `10`, Store::make(a, {x * `6`}, Load::make(a, {x * `6` + `5`})));
1634	stmt ->accept(&analyzer);
1635
1636	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1637	}
1638
1639	{
1640	/ for (int x = 0; x < 10; x++) {*
1641	* A[x * 2] = A[x * 6];
1642	* }
1643	*/
1644
1645	// detects accesses are overlapping when stride is different but a
1646	// multiple.
1647
1648	MemDependencyChecker analyzer;
1649	StmtPtr stmt =
1650	For::make(x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `6`})));
1651	stmt ->accept(&analyzer);
1652
1653	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1654	}
1655
1656	{
1657	/ for (int x = 0; x < 10; x++) {*
1658	* A[x * 4] = A[x * 2];
1659	* }
1660	*/
1661
1662	// still works when the read axis is the smaller stride.
1663
1664	MemDependencyChecker analyzer;
1665	StmtPtr stmt =
1666	For::make(x, `0`, `10`, Store::make(a, {x * `4`}, Load::make(a, {x * `2`})));
1667	stmt ->accept(&analyzer);
1668
1669	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1670	}
1671
1672	{
1673	/ for (int x = 0; x < 10; x++) {*
1674	* A[x * 2] = A[x * 6 + 1];
1675	* }
1676	*/
1677
1678	// detects accesses are distinct when stride is different but a multiple
1679	// and there is an offset.
1680
1681	MemDependencyChecker analyzer;
1682	StmtPtr stmt = For::make(
1683	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `6` + `1`})));
1684	stmt ->accept(&analyzer);
1685
1686	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1687	}
1688
1689	{
1690	/ for (int x = 0; x < 10; x++) {*
1691	* A[x * 2] = A[x * 6 + 4];
1692	* }
1693	*/
1694
1695	// The smaller stride determines whether there is overlap.
1696
1697	MemDependencyChecker analyzer;
1698	StmtPtr stmt = For::make(
1699	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `6` + `4`})));
1700	stmt ->accept(&analyzer);
1701
1702	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1703	}
1704
1705	{
1706	/ for (int x = 0; x < 10; x++) {*
1707	* A[x * 2 + 3] = A[x * 6];
1708	* }
1709	*/
1710
1711	// The smaller stride determines whether there is overlap, not the larger.
1712
1713	MemDependencyChecker analyzer;
1714	StmtPtr stmt = For::make(
1715	x, `0`, `10`, Store::make(a, {x * `2` + `3`}, Load::make(a, {x * `6`})));
1716	stmt ->accept(&analyzer);
1717
1718	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1719	}
1720
1721	{
1722	/ for (int x = 0; x < 10; x++) {*
1723	* A[x * 2] = A[x * 3 + 1];
1724	* }
1725	*/
1726
1727	// If they have strides with no common muliple > 1, they overlap.
1728	MemDependencyChecker analyzer;
1729	StmtPtr stmt = For::make(
1730	x, `0`, `10`, Store::make(a, {x * `2`}, Load::make(a, {x * `3` + `1`})));
1731	stmt ->accept(&analyzer);
1732
1733	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1734	}
1735
1736	{
1737	/ for (int x = 0; x < 10; x++) {*
1738	* A[x] = A[x + 10];
1739	* }
1740	*/
1741
1742	// If the offset is greater than the size of the loop, they can't overlap.
1743
1744	MemDependencyChecker analyzer;
1745	StmtPtr stmt =
1746	For::make(x, `0`, `10`, Store::make(a, {x}, Load::make(a, {x + `10`})));
1747	stmt ->accept(&analyzer);
1748
1749	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1750	}
1751
1752	{
1753	/ for (int x = 0; x < 10; x++) {*
1754	* A[x] = A[9 - x];
1755	* }
1756	*/
1757
1758	// If they have different execution orders they may overlap.
1759	MemDependencyChecker analyzer;
1760	StmtPtr stmt = For::make(
1761	x, `0`, `10`, Store::make(a, {x}, Load::make(a, {ExprHandle (`9`) - x})));
1762	stmt ->accept(&analyzer);
1763
1764	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1765	}
1766
1767	{
1768	/ for (int x = 0; x < 10; x++) {*
1769	* A[x * 2] = A[19 - x * 2];
1770	* }
1771	*/
1772
1773	// Or they may not, depending on their start offset and strides.
1774	MemDependencyChecker analyzer;
1775	StmtPtr stmt = For::make(
1776	x,
1777	`0`,
1778	`10`,
1779	Store::make(a, {x * `2`}, Load::make(a, {ExprHandle (`19`) - x * `2`})));
1780	stmt ->accept(&analyzer);
1781
1782	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1783	}
1784
1785	{
1786	/ for (int x = 0; x < 10; x++) {*
1787	* A[x / 2] = A[x / 2];
1788	* }
1789	*/
1790
1791	// If the stride is not monotonic, they overlap.
1792
1793	MemDependencyChecker analyzer;
1794	StmtPtr stmt =
1795	For::make(x, `0`, `10`, Store::make(a, {x / `2`}, Load::make(a, {x / `2`})));
1796	stmt ->accept(&analyzer);
1797
1798	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1799	}
1800
1801	{
1802	/ for (int x = 0; x < 10; x++) {*
1803	* A[x / 2] = A[x / 2] + 1;
1804	* }
1805	*/
1806
1807	// If the stride is not monotonic, they overlap - even with an offset.
1808	MemDependencyChecker analyzer;
1809	StmtPtr stmt = For::make(
1810	x, `0`, `10`, Store::make(a, {x / `2`}, Load::make(a, {x / `2` + `1`})));
1811	stmt ->accept(&analyzer);
1812
1813	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1814	}
1815
1816	{
1817	/ for (int x = 0; x < 10; x++) {*
1818	* A[x % 2] = A[x % 2];
1819	* }
1820	*/
1821
1822	// Mod too...
1823
1824	analysis::MemDependencyChecker analyzer;
1825	StmtPtr stmt = For::make(
1826	x,
1827	`0`,
1828	`10`,
1829	Store::make(a, {Mod::make(x, `2`)}, Load::make(a, {Mod::make(x, `2`)})));
1830	stmt ->accept(&analyzer);
1831
1832	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1833	}
1834
1835	{
1836	/ for (int x = y; x < z; x++) {*
1837	* A[x] = A[x + 1];
1838	* }
1839	*/
1840
1841	// Still works with symbolic loop extents.
1842
1843	{
1844	MemDependencyChecker analyzer;
1845	StmtPtr stmt =
1846	For::make(x, y, z, Store::make(a, {x}, Load::make(a, {x + `1`})));
1847	stmt ->accept(&analyzer);
1848
1849	ASSERT_TRUE(isSelfDependent(analyzer.getHistory()));
1850	}
1851
1852	{
1853	MemDependencyChecker analyzer;
1854	analyzer.allowLoopExecutionOrderAnalysis();
1855	StmtPtr stmt =
1856	For::make(x, y, z, Store::make(a, {x}, Load::make(a, {x + `1`})));
1857	stmt ->accept(&analyzer);
1858
1859	ASSERT_FALSE(isSelfDependent(analyzer.getHistory()));
1860	}
1861	}
1862	}
1863
1864	// Verify that a strided access still works.
1865	// TODO: actually this only works because of the size of the ranges, revist this
1866	// test after strided overlap is implemented.
1867	TEST(MemDependency, MemDependencyCheckerLoopDistinctStrides) {
1868	BufHandle a("A", {`20`}, kInt);
1869	BufHandle b("B", {`20`}, kInt);
1870	VarHandle x("x", kInt);
1871	VarHandle y("y", kInt);
1872
1873	using namespace analysis;
1874	MemDependencyChecker analyzer({a.node()}, {b.node()});
1875	StmtPtr stmt = Block::make(
1876	{For::make(
1877	x, `0`, `10`, Store::make(b, {x * `2` + `1`}, Load::make(a, {x * `2` + `1`}))),
1878	For::make(x, `0`, `10`, Store::make(b, {x * `2`}, Load::make(a, {x * `2`})))
1879
1880	});
1881	stmt ->accept(&analyzer);
1882
1883	// Sanity check output depends on input.
1884	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
1885
1886	// Output has 2 dependencies... the store in each loop.
1887	auto outputAccess = analyzer.output(b.node());
1888	ASSERT_EQ(outputAccess ->dependencies().size(), `2`);
1889	}
1890
1891	/ TODO(nickg) - this test will fail due to the lack of stride math in Bound*
1892	TEST(MemDependency, MemDependencyCheckerLoopDistinctStrides) {
1893	BufHandle a("A", {20}, kInt);
1894	BufHandle b("B", {20}, kInt);
1895	BufHandle c("C", {10}, kInt);
1896	VarHandle x("x", kInt);
1897	VarHandle y("y", kInt);
1898
1899	{
1900	analysis::MemDependencyChecker analyzer({a.node()}, {c.node()});
1901	StmtPtr stmt = Block::make(
1902	{For::make(
1903	x,
1904	0,
1905	10,
1906	Store::make(b, {x 2 + 1}, Load::make(a, {x * 2 + 1}))),*
1907	For::make(
1908	x, 0, 10, Store::make(b, {x 2}, Load::make(a, {x * 2}))),*
1909	For::make(x, 0, 10, Store::make(c, {x}, Load::make(b, {x})))
1910
1911	});
1912	stmt->accept(&analyzer);
1913
1914	std::cout << stmt << "\n";*
1915	for (auto& wi : analyzer.getHistory()) {
1916	wi->print();
1917	}
1918	}
1919	}/*
1920
1921	// analysis on Stmts using Cond.
1922	TEST(MemDependency, MemDependencyCheckerLoopBoundsCond) {
1923	BufHandle a("A", {`10`}, kInt);
1924	BufHandle b("B", {`10`}, kInt);
1925	BufHandle c("C", {`10`}, kInt);
1926	VarHandle x("x", kInt);
1927	VarHandle y("y", kInt);
1928
1929	using namespace analysis;
1930
1931	{
1932	/ for (int x = 0; x < 10; x++) {*
1933	* C[x] = A[x];
1934	* }
1935	* if (y<5 ? 1 : 0) {
1936	* C[0] = (B[0]) + 1;
1937	* } else {
1938	* C[0] = (B[1]) + 1;
1939	* }
1940	*/
1941
1942	// Future usages may depend on accesses in both branches of a condition.
1943
1944	MemDependencyChecker analyzer({a, b}, {c});
1945	StmtPtr stmt = Block::make(
1946	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
1947	Cond::make(
1948	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
1949	Store::make(c, {`0`}, Add::make(Load::make(b, {`0`}), `1`)),
1950	Store::make(c, {`0`}, Add::make(Load::make(b, {`1`}), `1`)))});
1951
1952	stmt ->accept(&analyzer);
1953
1954	// Output C should have 3 dependencies, each of the three stores.
1955	auto outputAccess = analyzer.output(c.node());
1956	ASSERT_NE(outputAccess, nullptr);
1957	ASSERT_EQ(outputAccess ->dependencies().size(), `3`);
1958
1959	// C depends indirectly on A and B.
1960	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
1961	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
1962	}
1963
1964	{
1965	/ for (int x = 0; x < 10; x++) {*
1966	* C[x] = A[x];
1967	* }
1968	* if (y<5 ? 1 : 0) {
1969	* for (int x = 0; x < 10; x++) {
1970	* C[x] = B[x];
1971	* }
1972	* } else {
1973	* for (int x = 0; x < 10; x++) {
1974	* C[x] = (B[x]) + 1;
1975	* }
1976	* }
1977	*/
1978
1979	// Future usages may depend on accesses in both branches of a condition.
1980
1981	MemDependencyChecker analyzer({a, b}, {c});
1982	StmtPtr stmt = Block::make(
1983	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
1984	Cond::make(
1985	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
1986	For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(b, {x}))),
1987	For::make(
1988	x,
1989	`0`,
1990	`10`,
1991	Store::make(c, {x}, Add::make(Load::make(b, {x}), `1`))))});
1992
1993	stmt ->accept(&analyzer);
1994
1995	// Output C should have 3 dependencies, each of the three stores.
1996	auto outputAccess = analyzer.output(c.node());
1997	ASSERT_NE(outputAccess, nullptr);
1998	ASSERT_EQ(outputAccess ->dependencies().size(), `3`);
1999
2000	// TODO(nickg): actually since the true and false branch cover the total
2001	// range of the first store this should have 2 dependencies, but we don't
2002	// do that yet.
2003
2004	// C depends indirectly on A and B.
2005	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2006	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2007	}
2008
2009	{
2010	/ for (int x = 0; x < 10; x++) {*
2011	* C[x] = A[x];
2012	* }
2013	* if (y<5 ? 1 : 0) {
2014	* for (int x = 0; x < 10; x++) {
2015	* C[x] = (B[x]) + 1;
2016	* }
2017	* }
2018	*/
2019
2020	// Only has true branch.
2021
2022	MemDependencyChecker analyzer({a, b}, {c});
2023	StmtPtr stmt = Block::make(
2024	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
2025	Cond::make(
2026	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
2027	For::make(
2028	x,
2029	`0`,
2030	`10`,
2031	Store::make(c, {x}, Add::make(Load::make(b, {x}), `1`))),
2032	nullptr)});
2033
2034	stmt ->accept(&analyzer);
2035
2036	// Output C should have 3 dependencies, each of the three stores.
2037	auto outputAccess = analyzer.output(c.node());
2038	ASSERT_NE(outputAccess, nullptr);
2039	ASSERT_EQ(outputAccess ->dependencies().size(), `2`);
2040
2041	// C depends indirectly on A and B.
2042	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2043	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2044	}
2045
2046	{
2047	/ for (int x = 0; x < 10; x++) {*
2048	* C[x] = A[x];
2049	* }
2050	* if (y<5 ? 1 : 0) {
2051	* } else {
2052	* for (int x = 0; x < 10; x++) {
2053	* C[x] = (B[x]) + 1;
2054	* }
2055	* }
2056	*/
2057
2058	// Only has false branch.
2059
2060	MemDependencyChecker analyzer({a, b}, {c});
2061	StmtPtr stmt = Block::make(
2062	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
2063	Cond::make(
2064	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
2065	nullptr,
2066	For::make(
2067	x,
2068	`0`,
2069	`10`,
2070	Store::make(c, {x}, Add::make(Load::make(b, {x}), `1`))))});
2071
2072	stmt ->accept(&analyzer);
2073
2074	// Output C should have 3 dependencies, each of the three stores.
2075	auto outputAccess = analyzer.output(c.node());
2076	ASSERT_NE(outputAccess, nullptr);
2077	ASSERT_EQ(outputAccess ->dependencies().size(), `2`);
2078
2079	// C depends indirectly on A and B.
2080	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2081	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2082	}
2083
2084	{
2085	/ for (int x = 0; x < 10; x++) {*
2086	* C[x] = A[x];
2087	* }
2088	* if (C[0]<5 ? 1 : 0) {
2089	* C[0] = 5;
2090	* }
2091	*/
2092
2093	// Cond's Condition depends on a previous access.
2094
2095	MemDependencyChecker analyzer({a}, {c});
2096	StorePtr initStore = Store::make(c, {x}, Load::make(a, {x}));
2097	ExprHandle conditionalLoad = Load::make(c, {`0`});
2098	StmtPtr stmt = Block::make(
2099	{For::make(x, `0`, `10`, initStore),
2100	Cond::make(
2101	CompareSelect::make(
2102	conditionalLoad, `5`, CompareSelectOperation::kLT),
2103	Store::make(c, {`0`}, `5`),
2104	nullptr)});
2105
2106	stmt ->accept(&analyzer);
2107
2108	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2109
2110	ASSERT_TRUE(analyzer.dependsDirectly(conditionalLoad.node(), initStore));
2111	ASSERT_FALSE(analyzer.dependsDirectly(conditionalLoad.node(), a.node()));
2112	ASSERT_TRUE(analyzer.dependsIndirectly(conditionalLoad.node(), a.node()));
2113	}
2114	}
2115
2116	// Stmts using IfThenElse.
2117	TEST(MemDependency, MemDependencyCheckerIfThenElse) {
2118	BufHandle a("A", {`10`}, kInt);
2119	BufHandle b("B", {`10`}, kInt);
2120	BufHandle c("C", {`10`}, kInt);
2121	VarHandle x("x", kInt);
2122	VarHandle y("y", kInt);
2123
2124	using namespace analysis;
2125
2126	{
2127	/ for (int x = 0; x < 10; x++) {*
2128	* C[x] = A[x];
2129	* }
2130	* C[0] = (y < 5 ? (B[0]) + 1 : (B[1]) + 1;
2131	*/
2132
2133	// Future usages may depend on accesses in both branches of a condition.
2134
2135	MemDependencyChecker analyzer({a, b}, {c});
2136	StorePtr ifStore = Store::make(
2137	c,
2138	{`0`},
2139	IfThenElse::make(
2140	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
2141	Add::make(Load::make(b, {`0`}), `1`),
2142	Add::make(Load::make(b, {`1`}), `1`)));
2143	StmtPtr stmt = Block::make(
2144	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
2145	ifStore});
2146
2147	stmt ->accept(&analyzer);
2148
2149	// Output C should have 2 dependencies, each of the two stores.
2150	auto outputAccess = analyzer.output(c.node());
2151	ASSERT_NE(outputAccess, nullptr);
2152	ASSERT_EQ(outputAccess ->dependencies().size(), `2`);
2153
2154	// Now we need to check the Store containing the IfThenElse.
2155	auto ifStoreAccess = analyzer.accessFor(ifStore);
2156
2157	// It should have 2 dependencies.
2158	ASSERT_EQ(ifStoreAccess ->dependencies().size(), `2`);
2159
2160	// C depends indirectly on A and B.
2161	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2162	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2163	}
2164
2165	{
2166	/ for (int x = 0; x < 10; x++) {*
2167	* C[x] = A[x];
2168	* }
2169	* C[0] = (y < 5 ? (B[0]) + 1 : 42;
2170	*/
2171
2172	// If the load appears in only one side of an IfThenElse the output may be
2173	// dependent on it.
2174
2175	MemDependencyChecker analyzer({a, b}, {c});
2176	StorePtr ifStore = Store::make(
2177	c,
2178	{`0`},
2179	IfThenElse::make(
2180	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
2181	Add::make(Load::make(b, {`0`}), `1`),
2182	`42`));
2183	StmtPtr stmt = Block::make(
2184	{For::make(x, `0`, `10`, Store::make(c, {x}, Load::make(a, {x}))),
2185	ifStore});
2186
2187	stmt ->accept(&analyzer);
2188
2189	// C depends indirectly on A and B.
2190	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2191	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2192	}
2193
2194	{
2195	/ for (int x = 0; x < 10; x++) {*
2196	* C[x] = (x < 5 ? B[x] : A[x];
2197	* }
2198	*/
2199
2200	// In this case C is dependent on both A and B.
2201
2202	// TODO: in cases like this it would be possible to split the range of B
2203	// into two bounds, one dependent on A and one depenent on B. We'd need to
2204	// examine conditions relative to previously encountered loop variables. I'm
2205	// uncertain if this would be helpful.
2206
2207	MemDependencyChecker analyzer({a, b}, {c});
2208	StorePtr ifStore = Store::make(
2209	c,
2210	{`0`},
2211	IfThenElse::make(
2212	CompareSelect::make(y, `5`, CompareSelectOperation::kLT),
2213	Load::make(b, {x}),
2214	Load::make(a, {x})));
2215	StmtPtr stmt = Block::make({For::make(x, `0`, `10`, ifStore)});
2216
2217	stmt ->accept(&analyzer);
2218
2219	// C depends indirectly on A and B.
2220	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2221	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2222	}
2223	}
2224
2225	// Cutting a loop with single elem writes
2226	TEST(MemDependency, MemDependencyCheckerCutLoop) {
2227	BufHandle a("A", {`10`}, kInt);
2228	BufHandle b("B", {`10`}, kInt);
2229	VarHandle x("x", kInt);
2230
2231	using namespace analysis;
2232
2233	{
2234	/ for (int x = 0; x < 10; x++) {*
2235	* B[x] = A[x];
2236	* }
2237	* B[5] = 100;
2238	*/
2239
2240	// Cutting a loop with single element writes.
2241
2242	MemDependencyChecker analyzer({a}, {b});
2243	StmtPtr stmt = Block::make(
2244	{For::make(x, `0`, `10`, Store::make(b, {x}, Load::make(a, {x}))),
2245	Store::make(b, {`5`}, `100`)});
2246
2247	stmt ->accept(&analyzer);
2248
2249	// Output depends on input.
2250	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2251
2252	// Output has 2 depdenencies.
2253	auto outputAccess = analyzer.output(b.node());
2254	ASSERT_NE(outputAccess, nullptr);
2255	ASSERT_EQ(outputAccess ->dependencies().size(), `2`);
2256	}
2257
2258	{
2259	/ for (int x = 0; x < 10; x++) {*
2260	* B[x] = A[x];
2261	* }
2262	* for (int x = 4; x < 7; x++) {
2263	* B[x] = B[x] + 3;
2264	* }
2265	* B[5] = 100;
2266	* B[6] = 101;
2267	* B[7] = 102;
2268	*/
2269
2270	// Cutting a loop with a smaller loop but then totally overlap that second
2271	// loop with one element writes.
2272
2273	MemDependencyChecker analyzer({a}, {b});
2274	ForPtr firstLoop =
2275	For::make(x, `0`, `10`, Store::make(b, {x}, Load::make(a, {x})));
2276	StorePtr secondStore =
2277	Store::make(b, {x}, Add::make(Load::make(b, {x}), `1`));
2278	ForPtr secondLoop = For::make(x, `4`, `7`, secondStore);
2279
2280	StmtPtr stmt = Block::make(
2281	{firstLoop,
2282	secondLoop,
2283	Store::make(b, {`4`}, `100`),
2284	Store::make(b, {`5`}, `101`),
2285	Store::make(b, {`6`}, `102`)});
2286
2287	stmt ->accept(&analyzer);
2288
2289	// Output depends on input.
2290	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2291
2292	// Output has 4 depdenencies.
2293	auto outputAccess = analyzer.output(b.node());
2294	ASSERT_NE(outputAccess, nullptr);
2295	ASSERT_EQ(outputAccess ->dependencies().size(), `4`);
2296
2297	// Second loop depends on first loop.
2298	ASSERT_TRUE(analyzer.dependsDirectly(secondLoop, firstLoop));
2299
2300	// Output does not depend on second loop or store.
2301	ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), secondLoop));
2302	ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), secondStore));
2303	}
2304	}
2305
2306	// Dynamic shapes (load in indices).
2307	TEST(MemDependency, MemDependencyCheckerDynamicShapes) {
2308	BufHandle a("A", {`100`}, kInt);
2309	BufHandle b("B", {`100`}, kInt);
2310	BufHandle c("C", {`100`}, kInt);
2311	VarHandle x("x", kInt);
2312
2313	using namespace analysis;
2314
2315	auto CB = [](ExprHandle s, ExprHandle e) {
2316	return Bound (s.node(), e.node());
2317	};
2318
2319	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
2320	return indexBoundsEquals(x, y);
2321	};
2322
2323	{
2324	/ for (int x = 0; x < B[0]; x++) {*
2325	* C[x] = A[x];
2326	* }
2327	*/
2328	MemDependencyChecker analyzer({a, b}, {c});
2329	StmtPtr stmt = Block::make({For::make(
2330	x, `0`, Load::make(b, {`0`}), Store::make(c, {x}, Load::make(a, {x})))});
2331
2332	stmt ->accept(&analyzer);
2333
2334	/ 0. Input: B[(0, 99)] - dependents: 2*
2335	* 1. Input: A[(0, 99)] - dependents: 3
2336	* 2. Load: B[(0, 0)] - depends on: 0 - dependents: 3 4
2337	* 3. Load: A[(0, (B[0]) - 1)] - depends on: 1 2 - dependents: 4
2338	* 4. Store: C[(0, (B[0]) - 1)] - depends on: 2 3 - dependents: 5
2339	* 5. Output: C[(0, 99)] - depends on: 4
2340	*/
2341
2342	// Output dependent on A input.
2343	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2344	// Also dependent on B input to determine the size of the region written.
2345	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2346
2347	auto history = analyzer.getHistory();
2348	ASSERT_EQ(history.size(), `6`);
2349
2350	// The accesses in the loop depend on the load in the stop condition.
2351	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2352	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2353
2354	// Make a load from B to compare against.
2355	ExprHandle loadFromB = Load::make(b, {`0`});
2356
2357	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`0`, loadFromB - `1`)}));
2358	ASSERT_TRUE(EQ(history[`4`]->bounds(), {CB(`0`, loadFromB - `1`)}));
2359	}
2360
2361	{
2362	/ for (int x = B[0]; x < B[1]; x++) {*
2363	* C[x] = A[x];
2364	* }
2365	*/
2366	MemDependencyChecker analyzer({a, b}, {c});
2367	StmtPtr stmt = Block::make({For::make(
2368	x,
2369	Load::make(b, {`0`}),
2370	Load::make(b, {`1`}),
2371	Store::make(c, {x}, Load::make(a, {x})))});
2372
2373	stmt ->accept(&analyzer);
2374
2375	/ 0. Input: B[(0, 99)] - dependents: 2 3*
2376	* 1. Input: A[(0, 99)] - dependents: 4
2377	* 2. Load: B[(0, 0)] - depends on: 0 - dependents: 4 5
2378	* 3. Load: B[(1, 1)] - depends on: 0 - dependents: 4 5
2379	* 4. Load: A[(B[0], (B[1]) - 1)] - depends on: 1 2 3 - dependents: 5
2380	* 5. Store: C[(B[0], (B[1]) - 1)] - depends on: 2 3 4 - dependents: 6
2381	* 6. Output: C[(0, 99)] - depends on: 5
2382	*/
2383
2384	// Sanity check output depends on input.
2385	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2386	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2387
2388	auto history = analyzer.getHistory();
2389	ASSERT_EQ(history.size(), `7`);
2390
2391	// The accesses in the loop depend on the load in the start condition.
2392	ASSERT_TRUE(history[`5`]->hasDependency(history[`2`]));
2393	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2394
2395	// also the stop condition.
2396	ASSERT_TRUE(history[`5`]->hasDependency(history[`3`]));
2397	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2398
2399	// Make loads from B to compare against.
2400	ExprHandle loadFromB0 = Load::make(b, {`0`});
2401	ExprHandle loadFromB1 = Load::make(b, {`1`});
2402	ASSERT_TRUE(EQ(history[`4`]->bounds(), {CB(loadFromB0, loadFromB1 - `1`)}));
2403	ASSERT_TRUE(EQ(history[`5`]->bounds(), {CB(loadFromB0, loadFromB1 - `1`)}));
2404	}
2405
2406	{
2407	/ for (int x = 0; x < 10; x++) {*
2408	* C[x] = A[B[x]];
2409	* }
2410	*/
2411	MemDependencyChecker analyzer({a, b}, {c});
2412	StmtPtr stmt = Block::make({For::make(
2413	x, `0`, `10`, Store::make(c, {x}, Load::make(a, {Load::make(b, {x})})))});
2414
2415	stmt ->accept(&analyzer);
2416
2417	/ 0. Input: B[(0, 99)] - dependents: 2*
2418	* 1. Input: A[(0, 99)] - dependents: 3
2419	* 2. Load: B[(0, 9)] - depends on: 0 - dependents: 3 4
2420	* 3. Load: A[(B[0], B[9])] - depends on: 1 2 - dependents: 4
2421	* 4. Store: C[(0, 9)] - depends on: 2 3 - dependents: 5
2422	* 5. Output: C[(0, 99)] - depends on: 4
2423	*/
2424
2425	// Sanity check output depends on input.
2426	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2427	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2428
2429	auto history = analyzer.getHistory();
2430	ASSERT_EQ(history.size(), `6`);
2431
2432	// The store depends on both loads, the load of A depends on the load of B.
2433	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2434	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2435
2436	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2437
2438	// The loads in the indices depend on the relevant input buffer.
2439	ASSERT_TRUE(history[`3`]->hasDependency(history[`1`]));
2440	ASSERT_TRUE(history[`2`]->hasDependency(history[`0`]));
2441
2442	// The load from B has the loop bounds.
2443	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`0`, `9`)}));
2444
2445	// The load from A has bounds B[0] to B[9].
2446	ExprHandle loadFromB0 = Load::make(b, {`0`});
2447	ExprHandle loadFromB9 = Load::make(b, {`9`});
2448	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(loadFromB0, loadFromB9)}));
2449	}
2450
2451	{
2452	/ for (int x = 0; x < 10; x++) {*
2453	* C[B[x]] = A[x];
2454	* }
2455	*/
2456	MemDependencyChecker analyzer({a, b}, {c});
2457	StmtPtr stmt = Block::make({For::make(
2458	x, `0`, `10`, Store::make(c, {Load::make(b, {x})}, Load::make(a, {x})))});
2459
2460	stmt ->accept(&analyzer);
2461
2462	/ 0. Input: B[(0, 99)] - dependents: 3*
2463	* 1. Input: A[(0, 99)] - dependents: 2
2464	* 2. Load: A[(0, 9)] - depends on: 1 - dependents: 4
2465	* 3. Load: B[(0, 9)] - depends on: 0 - dependents: 4
2466	* 4. Store: C[(B[0], B[9])] - depends on: 2 3 - dependents: 5
2467	* 5. Output: C[(0, 99)] - depends on: 4
2468	*/
2469	// Sanity check output depends on input.
2470	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2471	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2472
2473	auto history = analyzer.getHistory();
2474	ASSERT_EQ(history.size(), `6`);
2475
2476	// The store depends on both loads, neither load is dependent.
2477	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2478	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2479
2480	ASSERT_FALSE(history[`3`]->hasDependency(history[`2`]));
2481	ASSERT_FALSE(history[`2`]->hasDependency(history[`3`]));
2482
2483	// The loads each depend on their relevant input. (but accesses are in a
2484	// different order than the last case).
2485	ASSERT_TRUE(history[`3`]->hasDependency(history[`0`]));
2486	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2487
2488	// The load from B has the loop bounds.
2489	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`0`, `9`)}));
2490
2491	// And so does the load from A.
2492	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`0`, `9`)}));
2493	}
2494
2495	{
2496	/ for (int x = 0; x < 10; x++) {*
2497	* C[B[A[x]]] = x;
2498	* }
2499	*/
2500	MemDependencyChecker analyzer({a, b}, {c});
2501	StmtPtr stmt = Block::make({For::make(
2502	x, `0`, `10`, Store::make(c, {Load::make(b, {Load::make(a, {x})})}, x))});
2503
2504	stmt ->accept(&analyzer);
2505
2506	/ 0. Input: B[(0, 99)] - dependents: 3*
2507	* 1. Input: A[(0, 99)] - dependents: 2
2508	* 2. Load: A[(0, 9)] - depends on: 1 - dependents: 3 4
2509	* 3. Load: B[(A[0], A[9])] - depends on: 0 2 - dependents: 4
2510	* 4. Store: C[(B[A[0]], B[A[9]])] - depends on: 2 3 - dependents: 5
2511	* 5. Output: C[(0, 99)] - depends on: 4
2512	*/
2513
2514	// Sanity check output depends on input.
2515	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node()));
2516	ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node()));
2517
2518	auto history = analyzer.getHistory();
2519	ASSERT_EQ(history.size(), `6`);
2520
2521	// The store depends on both loads.
2522	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2523	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2524
2525	// The outer load depends on the inner.
2526	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2527
2528	// The loads each depend on their relevant input. (but accesses are in a
2529	// different order than the last case).
2530	ASSERT_TRUE(history[`3`]->hasDependency(history[`0`]));
2531	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2532
2533	// The load from A has the loop bounds.
2534	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`0`, `9`)}));
2535	// The load from B as bounds A[0] to A[9].
2536	ExprHandle loadFromA0 = Load::make(a, {`0`});
2537	ExprHandle loadFromA9 = Load::make(a, {`9`});
2538	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(loadFromA0, loadFromA9)}));
2539
2540	// The store has bounds of B[A[0]] to B[A[9]].
2541	ExprHandle loadFromBA0 = Load::make(b, {loadFromA0});
2542	ExprHandle loadFromBA9 = Load::make(b, {loadFromA9});
2543	ASSERT_TRUE(EQ(history[`4`]->bounds(), {CB(loadFromBA0, loadFromBA9)}));
2544	}
2545	}
2546
2547	// Verify multi dimensional bounds work.
2548	TEST(MemDependency, MemDependencyCheckerMultiDim) {
2549	int M = `10`, N = `9`, K = `12`;
2550	BufHandle a("A", {M, N, K}, kInt);
2551	BufHandle b("B", {M, N, K}, kInt);
2552	BufHandle c("C", {M, K}, kInt);
2553	VarHandle x("x", kInt);
2554	VarHandle y("y", kInt);
2555	VarHandle z("z", kInt);
2556
2557	using namespace analysis;
2558
2559	auto CB = [](ExprHandle s, ExprHandle e) {
2560	return Bound (s.node(), e.node());
2561	};
2562
2563	auto EQ = [](const IndexBounds& x, const IndexBounds& y) {
2564	return indexBoundsEquals(x, y);
2565	};
2566
2567	{
2568	/ for (int x = 0; x < 10; x++) {*
2569	* for (int y = 0; y < 9; y++) {
2570	* for (int z = 0; z < 12; z++) {
2571	* B[x, y, z] = A[x, y, z];
2572	* }
2573	* }
2574	* }
2575	*/
2576	// Full range.
2577
2578	MemDependencyChecker analyzer({a}, {b});
2579	StmtPtr stmt = Block::make({For::make(
2580	x,
2581	`0`,
2582	M,
2583	For::make(
2584	y,
2585	`0`,
2586	N,
2587	For::make(
2588	z,
2589	`0`,
2590	K,
2591	Store::make(b, {x, y, z}, Load::make(a, {x, y, z})))))});
2592
2593	stmt ->accept(&analyzer);
2594
2595	// Sanity test: Output depends on input.
2596	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2597
2598	// 4 accesses: input, load, store, output.
2599	auto history = analyzer.getHistory();
2600	ASSERT_EQ(history.size(), `4`);
2601
2602	// Simple chain from input to output.
2603	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2604	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2605	ASSERT_TRUE(history[`1`]->hasDependency(history[`0`]));
2606
2607	ASSERT_TRUE(
2608	EQ(history[`1`]->bounds(), {CB(`0`, M - `1`), CB(`0`, N - `1`), CB(`0`, K - `1`)}));
2609	ASSERT_TRUE(
2610	EQ(history[`2`]->bounds(), {CB(`0`, M - `1`), CB(`0`, N - `1`), CB(`0`, K - `1`)}));
2611	}
2612
2613	{
2614	/ for (int x = 0; x < 5; x++) {*
2615	* for (int y = 0; y < 5; y++) {
2616	* for (int z = 0; z < 5; z++) {
2617	* B[x, y, z] = A[x, y, z];
2618	* }
2619	* }
2620	* }
2621	*/
2622	// Partial range.
2623
2624	MemDependencyChecker analyzer({a}, {b});
2625	StmtPtr stmt = Block::make({For::make(
2626	x,
2627	`0`,
2628	`5`,
2629	For::make(
2630	y,
2631	`0`,
2632	`5`,
2633	For::make(
2634	z,
2635	`0`,
2636	`5`,
2637	Store::make(b, {x, y, z}, Load::make(a, {x, y, z})))))});
2638
2639	stmt ->accept(&analyzer);
2640
2641	// Sanity test: Output depends on input.
2642	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2643
2644	// 4 accesses: input, load, store, output.
2645	auto history = analyzer.getHistory();
2646	ASSERT_EQ(history.size(), `4`);
2647
2648	// Simple chain from input to output.
2649	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2650	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2651	ASSERT_TRUE(history[`1`]->hasDependency(history[`0`]));
2652
2653	ASSERT_TRUE(EQ(history[`1`]->bounds(), {CB(`0`, `4`), CB(`0`, `4`), CB(`0`, `4`)}));
2654	ASSERT_TRUE(EQ(history[`2`]->bounds(), {CB(`0`, `4`), CB(`0`, `4`), CB(`0`, `4`)}));
2655	}
2656
2657	{
2658	/ for (int x = 0; x < 10; x++) {*
2659	* for (int y = 0; y < 12; y++) {
2660	* B[x, 0, y] = A[x, 0, y];
2661	* }
2662	* }
2663	*/
2664
2665	// Partial loops.
2666
2667	MemDependencyChecker analyzer({a}, {b});
2668	StmtPtr stmt = Block::make({For::make(
2669	x,
2670	`0`,
2671	N,
2672	For::make(
2673	y, `0`, K, Store::make(b, {x, `0`, y}, Load::make(a, {x, `0`, y}))))});
2674
2675	stmt ->accept(&analyzer);
2676
2677	// Sanity test: Output depends on input.
2678	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2679
2680	// 4 accesses: input, load, store, output.
2681	auto history = analyzer.getHistory();
2682	ASSERT_EQ(history.size(), `4`);
2683
2684	// Simple chain from input to output.
2685	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2686	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2687	ASSERT_TRUE(history[`1`]->hasDependency(history[`0`]));
2688
2689	ASSERT_TRUE(
2690	EQ(history[`1`]->bounds(), {CB(`0`, N - `1`), CB(`0`, `0`), CB(`0`, K - `1`)}));
2691	ASSERT_TRUE(
2692	EQ(history[`2`]->bounds(), {CB(`0`, N - `1`), CB(`0`, `0`), CB(`0`, K - `1`)}));
2693	}
2694
2695	{
2696	/ for (int x = 0; x < 10; x++) {*
2697	* for (int y = 0; y < 100; y++) {
2698	* for (int z = 0; z < 12; z++) {
2699	* B[x, 0, z] = (A[x, 0, z]) + (C[x, z]);
2700	* }
2701	* }
2702	* }
2703	*/
2704
2705	// Loops that don't correspond to an index, bufs with different
2706	// dimensionality.
2707
2708	MemDependencyChecker analyzer({a, c}, {b});
2709	StmtPtr stmt = Block::make({For::make(
2710	x,
2711	`0`,
2712	M,
2713	For::make(
2714	y,
2715	`0`,
2716	`100`,
2717	For::make(
2718	z,
2719	`0`,
2720	K,
2721	Store::make(
2722	b,
2723	{x, `0`, z},
2724	Add::make(
2725	Load::make(a, {x, `0`, z}), Load::make(c, {x, z}))))))});
2726
2727	stmt ->accept(&analyzer);
2728
2729	// Sanity test: Output depends on both inputs.
2730	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2731	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), c.node()));
2732
2733	// 6 accesses: 2 inputs, 2 loads, store, output.
2734	auto history = analyzer.getHistory();
2735	ASSERT_EQ(history.size(), `6`);
2736
2737	// Simple chain from input to output over the A buf.
2738	// history[0] is the C input, history[3] is the load from C.
2739	ASSERT_TRUE(history[`5`]->hasDependency(history[`4`]));
2740	ASSERT_TRUE(history[`4`]->hasDependency(history[`2`]));
2741	ASSERT_TRUE(history[`2`]->hasDependency(history[`1`]));
2742	// The store also depends on the load from the C input.
2743	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2744	ASSERT_TRUE(history[`3`]->hasDependency(history[`0`]));
2745
2746	// A Buf accesses.
2747	ASSERT_TRUE(
2748	EQ(history[`4`]->bounds(), {CB(`0`, M - `1`), CB(`0`, `0`), CB(`0`, K - `1`)}));
2749	ASSERT_TRUE(
2750	EQ(history[`2`]->bounds(), {CB(`0`, M - `1`), CB(`0`, `0`), CB(`0`, K - `1`)}));
2751
2752	// C buf access.
2753	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`0`, M - `1`), CB(`0`, K - `1`)}));
2754	}
2755
2756	{
2757	/ for (int x = 0; x < 9; x++) {*
2758	* for (int y = 0; y < 10; y++) {
2759	* for (int z = 0; z < 12; z++) {
2760	* B[x, 0, 0] = (B[x, y, z]) + (A[x, y, z]);
2761	* }
2762	* }
2763	* }
2764	*/
2765	// Multi-dim reductions.
2766
2767	MemDependencyChecker analyzer({a}, {b});
2768	StmtPtr stmt = Block::make({For::make(
2769	x,
2770	`0`,
2771	M,
2772	For::make(
2773	y,
2774	`0`,
2775	N,
2776	For::make(
2777	z,
2778	`0`,
2779	K,
2780	Store::make(
2781	b,
2782	{x, `0`, `0`},
2783	Add::make(
2784	Load::make(b, {x, y, z}),
2785	Load::make(a, {x, y, z}))))))});
2786
2787	stmt ->accept(&analyzer);
2788
2789	// Sanity test: Output depends on input.
2790	ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node()));
2791
2792	// 4 accesses: input, 2 loads, store, output.
2793	auto history = analyzer.getHistory();
2794	ASSERT_EQ(history.size(), `5`);
2795
2796	// Simple chain from input to output.
2797	ASSERT_TRUE(history[`4`]->hasDependency(history[`3`]));
2798	ASSERT_TRUE(history[`3`]->hasDependency(history[`2`]));
2799	ASSERT_TRUE(history[`3`]->hasDependency(history[`1`]));
2800	ASSERT_TRUE(history[`2`]->hasDependency(history[`0`]));
2801
2802	// The load from B depends on the store to B.
2803	ASSERT_TRUE(history[`1`]->hasDependency(history[`3`]));
2804
2805	ASSERT_TRUE(
2806	EQ(history[`1`]->bounds(), {CB(`0`, M - `1`), CB(`0`, N - `1`), CB(`0`, K - `1`)}));
2807	ASSERT_TRUE(
2808	EQ(history[`2`]->bounds(), {CB(`0`, M - `1`), CB(`0`, N - `1`), CB(`0`, K - `1`)}));
2809	ASSERT_TRUE(EQ(history[`3`]->bounds(), {CB(`0`, M - `1`), CB(`0`, `0`), CB(`0`, `0`)}));
2810	}
2811	}
2812
2813	// Various tests using the external Compute/Reduce API.
2814	TEST(MemDependency, MemDependencyCheckerComputeAPI) {
2815	using namespace analysis;
2816
2817	/ for (int m = 0; m < 4; m++) {*
2818	* for (int n = 0; n < 5; n++) {
2819	* for (int k = 0; k < 6; k++) {
2820	* broadcast_add[m, n, k] = (a[m, n]) + (b[n, k]);
2821	* }
2822	* }
2823	* }
2824	* for (int m_1 = 0; m_1 < 4; m_1++) {
2825	* for (int n_1 = 0; n_1 < 5; n_1++) {
2826	* for (int k_1 = 0; k_1 < 6; k_1++) {
2827	* d[m_1, n_1, k_1] = (broadcast_add(m_1, n_1, k_1)) + float(1);
2828	* }
2829	* }
2830	* }
2831	*/
2832
2833	// Can determine if 2 loops created by Compute are dependent.
2834	BufHandle a_buf("a", {`4`, `5`}, kFloat);
2835	BufHandle b_buf("b", {`5`, `6`}, kFloat);
2836	Tensor c = Compute(
2837	"broadcast_add",
2838	{`4`, `5`, `6`},
2839	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2840	return a_buf.load(m, n) + b_buf.load(n, k);
2841	});
2842	Tensor d = Compute(
2843	"d",
2844	{`4`, `5`, `6`},
2845	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2846	return c.load(m, n, k) + `1`;
2847	});
2848
2849	LoopNest l({d}, {c, d});
2850
2851	MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()});
2852
2853	l.root_stmt()->accept(&analyzer);
2854
2855	// Sanity test: Output depends on input.
2856	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node()));
2857	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node()));
2858
2859	// Second loop depends on first loop.
2860	auto c_loop = l.getLoopStmtsFor(c)[`0`];
2861	auto d_loop = l.getLoopStmtsFor(d)[`0`];
2862	ASSERT_TRUE(analyzer.dependsDirectly(d_loop, c_loop));
2863	}
2864
2865	TEST(MemDependency, MemDependencyCheckerComputeInline) {
2866	using namespace analysis;
2867
2868	/ for (int m = 0; m < 4; m++) {*
2869	* for (int n = 0; n < 5; n++) {
2870	* for (int k = 0; k < 6; k++) {
2871	* d[m, n, k] = ((a[m, n]) + (b[n, k])) + float(1);
2872	* }
2873	* }
2874	* }
2875	*/
2876
2877	// Check inlining affects the number of accesses returned.
2878
2879	BufHandle a_buf("a", {`4`, `5`}, kFloat);
2880	BufHandle b_buf("b", {`5`, `6`}, kFloat);
2881	Tensor c = Compute(
2882	"broadcast_add",
2883	{`4`, `5`, `6`},
2884	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2885	return a_buf.load(m, n) + b_buf.load(n, k);
2886	});
2887	Tensor d = Compute(
2888	"d",
2889	{`4`, `5`, `6`},
2890	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2891	return c.load(m, n, k) + `1`;
2892	});
2893
2894	LoopNest l({d}, {c, d});
2895	l.computeInline(c.buf());
2896
2897	MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()});
2898	l.root_stmt()->accept(&analyzer);
2899
2900	// Sanity test: Output depends on input.
2901	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node()));
2902	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node()));
2903
2904	// broadcast_add tensor should not appear in trace at all.
2905	for (auto& wi : analyzer.getHistory()) {
2906	ASSERT_NE(wi ->var(), c.buf()->base_handle());
2907	}
2908	}
2909
2910	TEST(MemDependency, MemDependencyCheckerComputeSplit) {
2911	using namespace analysis;
2912	// Split an axis, so the number of loops != the number of dimensions.
2913
2914	BufHandle a_buf("a", {`4`, `5`}, kFloat);
2915	BufHandle b_buf("b", {`5`, `6`}, kFloat);
2916	Tensor c = Compute(
2917	"broadcast_add",
2918	{`4`, `5`, `6`},
2919	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2920	return a_buf.load(m, n) + b_buf.load(n, k);
2921	});
2922
2923	LoopNest l({c});
2924
2925	MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()});
2926	l.root_stmt()->accept(&analyzer_before);
2927
2928	l.splitWithTail(l.getLoopStmtsFor(c)[`0`], `2`);
2929
2930	MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()});
2931	StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
2932	stmt ->accept(&analyzer_after);
2933
2934	// Splitting should not change accesses at all.
2935	auto history_before = analyzer_before.getHistory();
2936	auto history_after = analyzer_after.getHistory();
2937
2938	ASSERT_EQ(history_before.size(), history_after.size());
2939
2940	for (size_t i = `0`; i < history_before.size(); ++i) {
2941	ASSERT_EQ(history_before[i]->type(), history_after[i]->type());
2942	ASSERT_EQ(history_before[i]->var(), history_after[i]->var());
2943	ASSERT_EQ(
2944	history_before[i]->bounds().size(), history_after[i]->bounds().size());
2945	ASSERT_TRUE(indexBoundsEquals(
2946	history_before[i]->bounds(), history_after[i]->bounds()));
2947	ASSERT_EQ(
2948	history_before[i]->dependencies().size(),
2949	history_after[i]->dependencies().size());
2950	ASSERT_EQ(
2951	history_before[i]->dependents().size(),
2952	history_after[i]->dependents().size());
2953	}
2954	}
2955
2956	TEST(MemDependency, MemDependencyCheckerComputeReorder) {
2957	using namespace analysis;
2958	// Reorder an axis, so the loop order doesn't match the indexing order.
2959
2960	BufHandle a_buf("a", {`4`, `5`}, kFloat);
2961	BufHandle b_buf("b", {`5`, `6`}, kFloat);
2962	Tensor c = Compute(
2963	"broadcast_add",
2964	{`4`, `5`, `6`},
2965	[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
2966	return a_buf.load(m, n) + b_buf.load(n, k);
2967	});
2968
2969	LoopNest l({c});
2970
2971	MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()});
2972	l.root_stmt()->accept(&analyzer_before);
2973
2974	auto loops = l.getLoopStmtsFor(c);
2975	l.reorderAxis(loops [`0`], loops [`1`]);
2976
2977	MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()});
2978	StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
2979	stmt ->accept(&analyzer_after);
2980
2981	// Reordering should not change accesses at all.
2982	auto history_before = analyzer_before.getHistory();
2983	auto history_after = analyzer_after.getHistory();
2984
2985	ASSERT_EQ(history_before.size(), history_after.size());
2986
2987	for (size_t i = `0`; i < history_before.size(); ++i) {
2988	ASSERT_EQ(history_before[i]->type(), history_after[i]->type());
2989	ASSERT_EQ(history_before[i]->var(), history_after[i]->var());
2990	ASSERT_EQ(
2991	history_before[i]->bounds().size(), history_after[i]->bounds().size());
2992	ASSERT_TRUE(indexBoundsEquals(
2993	history_before[i]->bounds(), history_after[i]->bounds()));
2994	ASSERT_EQ(
2995	history_before[i]->dependencies().size(),
2996	history_after[i]->dependencies().size());
2997	ASSERT_EQ(
2998	history_before[i]->dependents().size(),
2999	history_after[i]->dependents().size());
3000	}
3001	}
3002
3003	TEST(MemDependency, MemDependencyCheckerComputeReduce) {
3004	using namespace analysis;
3005	/ for (int l2 = 0; l2 < 2; l2++) {*
3006	* for (int n1 = 0; n1 < 3; n1++) {
3007	* for (int m1 = 0; m1 < 6; m1++) {
3008	* scale[l2, n1, m1] = (b[l2, n1, m1]) * (a[l2, n1, m1]);
3009	* }
3010	* }
3011	* }
3012	* for (int l1 = 0; l1 < 2; l1++) {
3013	* sum[l1] = float(0);
3014	* for (int n1_1 = 0; n1_1 < 3; n1_1++) {
3015	* for (int m1_1 = 0; m1_1 < 6; m1_1++) {
3016	* sum[l1] = ReduceOp(sum, (sum[l1]) + (scale(l1, n1_1, m1_1)),
3017	* out_args={l1}, reduce_args={n1, m1});
3018	* }
3019	* }
3020	* }
3021	*/
3022
3023	// Can determine dependencies of a Reduction.
3024
3025	BufHandle a("a", {`2`, `3`, `6`}, kFloat);
3026	BufHandle b("b", {`2`, `3`, `6`}, kFloat);
3027
3028	Tensor c = Compute(
3029	"scale",
3030	{`2`, `3`, `6`},
3031	[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
3032	return b.load(l, n, m) * a.load(l, n, m);
3033	});
3034	Tensor d = Reduce("sum", {`2`}, Sum (), c, {`3`, `6`});
3035	LoopNest l({d}, {c, d});
3036
3037	MemDependencyChecker analyzer({a.node(), b.node()}, {d.buf()});
3038
3039	l.root_stmt()->accept(&analyzer);
3040
3041	// Sanity test: Output depends on input.
3042	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.node()));
3043	ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.node()));
3044
3045	// Second loop depends on first loop.
3046	auto c_loop = l.getLoopStmtsFor(c)[`0`];
3047	auto d_loop = l.getLoopStmtsFor(d)[`0`];
3048	ASSERT_TRUE(analyzer.dependsDirectly(d_loop, c_loop));
3049
3050	// Reduction depends on both inputs.
3051	auto reduces = NodeFinder<ReduceOp>::find(l.root_stmt());
3052	ASSERT_TRUE(analyzer.dependsIndirectly(reduces[`0`], a.node()));
3053	ASSERT_TRUE(analyzer.dependsIndirectly(reduces[`0`], b.node()));
3054	}
3055
3056	TEST(MemDependency, MemDependencyCheckerComputeGEMM) {
3057	int M = `1024`;
3058	int N = `1024`;
3059	int K = `2048`;
3060	using namespace analysis;
3061
3062	BufHandle AP("A", {M, K}, kFloat);
3063	BufHandle BP("B", {K, N}, kFloat);
3064	Tensor CT = Reduce(
3065	"gemm",
3066	{M, N},
3067	Sum (),
3068	[&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& k) {
3069	return AP.load(m, k) * BP.load(k, n);
3070	},
3071	{K});
3072	LoopNest loop({CT});
3073
3074	{
3075	auto const& loops = loop.getLoopStmtsFor(CT);
3076	ForPtr m = loops [`0`];
3077	loop.splitWithMask(m, `4`);
3078	}
3079	{
3080	auto const& loops = loop.getLoopStmtsFor(CT);
3081	ForPtr n = loops [`2`];
3082	loop.splitWithMask(n, `16`);
3083	}
3084	// mo, mi, no, ni, k ->
3085	// mo, no, mi, ni, k
3086	{
3087	auto const& loops = loop.getLoopStmtsFor(CT);
3088	ForPtr mi = loops [`1`];
3089	ForPtr no = loops [`2`];
3090	loop.reorderAxis(mi, no);
3091	}
3092	// mo, no, mi, ni, k ->
3093	// mo, no, mi, k, ni
3094	{
3095	auto const& loops = loop.getLoopStmtsFor(CT);
3096	ForPtr ni = loops [`3`];
3097	ForPtr k = loops [`4`];
3098	loop.reorderAxis(ni, k);
3099	}
3100	// mo, no, mi, k, ni ->
3101	// mo, no, k, mi, ni
3102	{
3103	auto const& loops = loop.getLoopStmtsFor(CT);
3104	ForPtr mi = loops [`2`];
3105	ForPtr k = loops [`3`];
3106	loop.reorderAxis(mi, k);
3107	}
3108	{
3109	auto const& loops = loop.getLoopStmtsFor(CT);
3110	loop.cacheAccesses(CT.buf(), "C_regs", loops [`2`]);
3111	}
3112
3113	MemDependencyChecker analyzer_unlowered(
3114	loop.getInputBufs(), loop.getOutputBufs());
3115
3116	MemDependencyChecker analyzer_lowered(
3117	loop.getInputBufs(), loop.getOutputBufs());
3118
3119	// Test both unlowered and lowered form.
3120	{
3121	StmtPtr stmt = IRSimplifier::simplify(loop.root_stmt());
3122	stmt ->accept(&analyzer_unlowered);
3123
3124	// Outputs depend on inputs.
3125	ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.node()));
3126	ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.node()));
3127
3128	// The last write to gemm should cover the total bound of the output.
3129	std::shared_ptr<AccessInfo> outputAccess =
3130	analyzer_unlowered.output(CT.buf());
3131	// A single dependency.
3132	ASSERT_EQ(outputAccess ->dependencies().size(), `1`);
3133
3134	// dependencies is a set with 1 element, so can just deref begin().
3135	std::shared_ptr<AccessInfo> gemmStore =
3136	outputAccess ->dependencies().begin()->second;
3137	// Check its a store.
3138	ASSERT_EQ(gemmStore ->type(), AccessType::Store);
3139
3140	ASSERT_TRUE(indexBoundsEquals(outputAccess ->bounds(), gemmStore ->bounds()));
3141
3142	// Likewise the first read from each input cover the entire range of the
3143	// input.
3144	auto aInput = analyzer_unlowered.input(AP.node());
3145	auto bInput = analyzer_unlowered.input(BP.node());
3146
3147	// A single dependent each.
3148	ASSERT_EQ(aInput ->dependents().size(), `1`);
3149	ASSERT_EQ(bInput ->dependents().size(), `1`);
3150
3151	// They're both loads.
3152	std::shared_ptr<AccessInfo> aLoad = aInput ->dependents().begin()->second;
3153	std::shared_ptr<AccessInfo> bLoad = bInput ->dependents().begin()->second;
3154	ASSERT_EQ(aLoad ->type(), AccessType::Load);
3155	ASSERT_EQ(bLoad ->type(), AccessType::Load);
3156
3157	ASSERT_TRUE(indexBoundsEquals(aInput ->bounds(), aLoad ->bounds()));
3158	ASSERT_TRUE(indexBoundsEquals(bInput ->bounds(), bLoad ->bounds()));
3159	}
3160
3161	loop.prepareForCodegen();
3162	SimpleIREvaluator cg(loop.root_stmt(), {AP, BP, CT});
3163
3164	// now check lowered dependency graph.
3165	{
3166	StmtPtr stmt = IRSimplifier::simplify(cg.stmt());
3167	stmt ->accept(&analyzer_lowered);
3168
3169	// Lowering will change the dimensionality of all bounds due to index
3170	// flattening and will insert Allocates and Frees.
3171
3172	auto history_before = analyzer_unlowered.getHistory();
3173	auto history_after = analyzer_lowered.getHistory();
3174
3175	ASSERT_EQ(history_before.size() + `2`, history_after.size());
3176
3177	// Filter out the alloc/free;
3178	auto isAllocFree = [](const auto& info) {
3179	return info->type() == AccessType::Alloc \|\|
3180	info->type() == AccessType::Free;
3181	};
3182	history_after.erase(
3183	std::remove_if(history_after.begin(), history_after.end(), isAllocFree),
3184	history_after.end());
3185
3186	ASSERT_EQ(history_before.size(), history_after.size());
3187
3188	for (size_t i = `0`; i < history_before.size(); ++i) {
3189	ASSERT_EQ(history_before[i]->type(), history_after[i]->type());
3190	ASSERT_EQ(history_before[i]->var(), history_after[i]->var());
3191
3192	if (history_before [i]->dependencies().size() !=
3193	history_after [i]->dependencies().size()) {
3194	// Must depend on an Alloc.
3195	ASSERT_TRUE(std::any_of(
3196	history_after[i]->dependencies().begin(),
3197	history_after[i]->dependencies().end(),
3198	[](const auto& pair) {
3199	return pair.second->type() == AccessType::Alloc;
3200	}));
3201
3202	ASSERT_EQ(
3203	history_before[i]->dependencies().size() + `1`,
3204	history_after[i]->dependencies().size());
3205	}
3206
3207	if (history_before [i]->dependents().size() !=
3208	history_after [i]->dependents().size()) {
3209	// Must depend on an Free.
3210	ASSERT_TRUE(std::any_of(
3211	history_after[i]->dependents().begin(),
3212	history_after[i]->dependents().end(),
3213	[](const auto& pair) {
3214	return pair.second->type() == AccessType::Free;
3215	}));
3216
3217	ASSERT_EQ(
3218	history_before[i]->dependents().size() + `1`,
3219	history_after[i]->dependents().size());
3220	}
3221
3222	// Inputs and outputs are not flattened, only accesses.
3223	if (history_before [i]->type() == AccessType::Input \|\|
3224	history_before [i]->type() == AccessType::Output) {
3225	ASSERT_EQ(
3226	history_before[i]->bounds().size(),
3227	history_after[i]->bounds().size());
3228	ASSERT_TRUE(indexBoundsEquals(
3229	history_before[i]->bounds(), history_after[i]->bounds()));
3230	} else {
3231	ASSERT_EQ(history_after[i]->bounds().size(), `1`);
3232	ExprPtr flat_bounds = alloc<IntImm>(`1`);
3233
3234	for (auto& b : history_before [i]->bounds()) {
3235	flat_bounds =
3236	alloc<Mul>(flat_bounds, alloc<Add>(b.end, alloc<IntImm>(`1`)));
3237
3238	// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
3239	ASSERT_TRUE(exprEquals(b.start, history_after[i]->bounds()[`0`].start));
3240	}
3241
3242	flat_bounds = IRSimplifier::simplify(flat_bounds);
3243	ExprPtr after_bounds = IRSimplifier::simplify(
3244	alloc<Add>(history_after [i]->bounds()[`0`].end, alloc<IntImm>(`1`)));
3245	ASSERT_TRUE(exprEquals(flat_bounds, after_bounds));
3246	}
3247	}
3248	}
3249	}
3250
3251	} // namespace jit
3252	} // namespace torch
3253

Browse the source code of pytorch/test/cpp/tensorexpr/test_memdependency.cpp