1 | #include <gtest/gtest.h> |
2 | #include <test/cpp/tensorexpr/test_base.h> |
3 | |
4 | #include <torch/csrc/jit/tensorexpr/bounds_overlap.h> |
5 | #include <torch/csrc/jit/tensorexpr/ir.h> |
6 | #include <torch/csrc/jit/tensorexpr/ir_printer.h> |
7 | #include <torch/csrc/jit/tensorexpr/ir_simplifier.h> |
8 | #include <torch/csrc/jit/tensorexpr/loopnest.h> |
9 | #include <torch/csrc/jit/tensorexpr/mem_dependency_checker.h> |
10 | #include <torch/csrc/jit/tensorexpr/tensor.h> |
11 | |
12 | namespace torch { |
13 | namespace jit { |
14 | |
15 | using namespace torch::jit::tensorexpr; |
16 | |
17 | // Test helper function used to determine if two regions of a buffer have an |
18 | // overlap. No Overlap & partial overlap is obvious. Contains means A is |
19 | // larger and fully encloses B, while ContainedOrEqual is the reverse. Equal |
20 | // ranges are ContainedOrEqual. |
21 | TEST(MemDependency, BoundOverlap) { |
22 | using namespace analysis; |
23 | |
24 | auto CB = [](int s, int e) { |
25 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
26 | }; |
27 | |
28 | // Sanity check 3 overlap cases. |
29 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(0, 0), CB(0, 0))); |
30 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(0, 3), CB(2, 5))); |
31 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(0, 0), CB(1, 1))); |
32 | |
33 | // Partial overlap works in either order. |
34 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(0, 10), CB(7, 14))); |
35 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(7, 14), CB(0, 10))); |
36 | |
37 | // Total Overlap works when one bound encloses the other, and returns which. |
38 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(2, 15), CB(7, 9))); |
39 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(2, 15), CB(0, 16))); |
40 | |
41 | // Total overlap works when the bounds are an identical range, returns |
42 | // ContainedOrEqual. |
43 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(2, 15), CB(2, 15))); |
44 | |
45 | // Total overlap when only one end of the bound matches. |
46 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(2, 15), CB(2, 10))); |
47 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(2, 15), CB(3, 15))); |
48 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(0, 10), CB(0, 9))); |
49 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(2, 10), CB(2, 15))); |
50 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(3, 15), CB(2, 15))); |
51 | |
52 | // No overlap when a < b. |
53 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(0, 2), CB(5, 10))); |
54 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(2, 2), CB(3, 3))); |
55 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(100, 120), CB(130, 130))); |
56 | |
57 | // No overlap when a > b. |
58 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(5, 10), CB(0, 2))); |
59 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(3, 3), CB(2, 2))); |
60 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(130, 130), CB(100, 120))); |
61 | |
62 | // No overlap when adjacent. |
63 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(0, 100), CB(101, 120))); |
64 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(2, 3), CB(0, 1))); |
65 | |
66 | // Partial overlap when middle bounds match. |
67 | ASSERT_EQ( |
68 | OverlapKind::PartialOverlap, boundOverlap(CB(0, 100), CB(100, 120))); |
69 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(0, 2), CB(2, 4))); |
70 | ASSERT_EQ( |
71 | OverlapKind::PartialOverlap, boundOverlap(CB(100, 120), CB(0, 100))); |
72 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(2, 3), CB(1, 2))); |
73 | |
74 | // Total overlap when one bound is single length over one end of the other. |
75 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(2, 15), CB(15, 15))); |
76 | ASSERT_EQ(OverlapKind::Contains, boundOverlap(CB(2, 15), CB(2, 2))); |
77 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(2, 2), CB(2, 15))); |
78 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(15, 15), CB(2, 15))); |
79 | } |
80 | |
81 | TEST(MemDependency, BoundComparison) { |
82 | using namespace analysis; |
83 | |
84 | auto CB = [](int s, int e) { |
85 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
86 | }; |
87 | |
88 | ASSERT_EQ( |
89 | CmpEvalResult::NotDetermined, |
90 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kEQ)); |
91 | ASSERT_EQ( |
92 | CmpEvalResult::True, |
93 | compareBound(CB(10, 10), CB(10, 10), CompareSelectOperation::kEQ)); |
94 | ASSERT_EQ( |
95 | CmpEvalResult::False, |
96 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kEQ)); |
97 | ASSERT_EQ( |
98 | CmpEvalResult::False, |
99 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kEQ)); |
100 | ASSERT_EQ( |
101 | CmpEvalResult::NotDetermined, |
102 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kEQ)); |
103 | ASSERT_EQ( |
104 | CmpEvalResult::NotDetermined, |
105 | compareBound(CB(30, 40), CB(20, 30), CompareSelectOperation::kEQ)); |
106 | ASSERT_EQ( |
107 | CmpEvalResult::NotDetermined, |
108 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kEQ)); |
109 | |
110 | ASSERT_EQ( |
111 | CmpEvalResult::NotDetermined, |
112 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kNE)); |
113 | ASSERT_EQ( |
114 | CmpEvalResult::False, |
115 | compareBound(CB(10, 10), CB(10, 10), CompareSelectOperation::kNE)); |
116 | ASSERT_EQ( |
117 | CmpEvalResult::True, |
118 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kNE)); |
119 | ASSERT_EQ( |
120 | CmpEvalResult::True, |
121 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kNE)); |
122 | ASSERT_EQ( |
123 | CmpEvalResult::NotDetermined, |
124 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kNE)); |
125 | ASSERT_EQ( |
126 | CmpEvalResult::NotDetermined, |
127 | compareBound(CB(30, 40), CB(20, 30), CompareSelectOperation::kEQ)); |
128 | ASSERT_EQ( |
129 | CmpEvalResult::NotDetermined, |
130 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kNE)); |
131 | |
132 | ASSERT_EQ( |
133 | CmpEvalResult::True, |
134 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kLT)); |
135 | ASSERT_EQ( |
136 | CmpEvalResult::False, |
137 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kLT)); |
138 | ASSERT_EQ( |
139 | CmpEvalResult::False, |
140 | compareBound(CB(30, 40), CB(10, 30), CompareSelectOperation::kLT)); |
141 | ASSERT_EQ( |
142 | CmpEvalResult::NotDetermined, |
143 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kLT)); |
144 | ASSERT_EQ( |
145 | CmpEvalResult::NotDetermined, |
146 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kLT)); |
147 | ASSERT_EQ( |
148 | CmpEvalResult::NotDetermined, |
149 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kLT)); |
150 | |
151 | ASSERT_EQ( |
152 | CmpEvalResult::False, |
153 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kGE)); |
154 | ASSERT_EQ( |
155 | CmpEvalResult::True, |
156 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kGE)); |
157 | ASSERT_EQ( |
158 | CmpEvalResult::True, |
159 | compareBound(CB(30, 40), CB(10, 30), CompareSelectOperation::kGE)); |
160 | ASSERT_EQ( |
161 | CmpEvalResult::NotDetermined, |
162 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kGE)); |
163 | ASSERT_EQ( |
164 | CmpEvalResult::NotDetermined, |
165 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kGE)); |
166 | ASSERT_EQ( |
167 | CmpEvalResult::NotDetermined, |
168 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kGE)); |
169 | |
170 | ASSERT_EQ( |
171 | CmpEvalResult::False, |
172 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kGT)); |
173 | ASSERT_EQ( |
174 | CmpEvalResult::False, |
175 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kGT)); |
176 | ASSERT_EQ( |
177 | CmpEvalResult::NotDetermined, |
178 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kGT)); |
179 | ASSERT_EQ( |
180 | CmpEvalResult::True, |
181 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kGT)); |
182 | ASSERT_EQ( |
183 | CmpEvalResult::NotDetermined, |
184 | compareBound(CB(30, 40), CB(10, 30), CompareSelectOperation::kGT)); |
185 | ASSERT_EQ( |
186 | CmpEvalResult::NotDetermined, |
187 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kGT)); |
188 | |
189 | ASSERT_EQ( |
190 | CmpEvalResult::True, |
191 | compareBound(CB(10, 20), CB(30, 40), CompareSelectOperation::kLE)); |
192 | ASSERT_EQ( |
193 | CmpEvalResult::True, |
194 | compareBound(CB(30, 40), CB(40, 50), CompareSelectOperation::kLE)); |
195 | ASSERT_EQ( |
196 | CmpEvalResult::NotDetermined, |
197 | compareBound(CB(10, 100), CB(10, 100), CompareSelectOperation::kLE)); |
198 | ASSERT_EQ( |
199 | CmpEvalResult::False, |
200 | compareBound(CB(30, 40), CB(10, 20), CompareSelectOperation::kLE)); |
201 | ASSERT_EQ( |
202 | CmpEvalResult::NotDetermined, |
203 | compareBound(CB(30, 40), CB(10, 30), CompareSelectOperation::kLE)); |
204 | ASSERT_EQ( |
205 | CmpEvalResult::NotDetermined, |
206 | compareBound(CB(30, 45), CB(40, 50), CompareSelectOperation::kLE)); |
207 | } |
208 | |
209 | TEST(MemDependency, BoundOverlapSymbolic) { |
210 | VarHandle x("x" , kInt); |
211 | VarHandle y("y" , kInt); |
212 | VarHandle z("z" , kInt); |
213 | VarHandle w("w" , kInt); |
214 | |
215 | using namespace analysis; |
216 | |
217 | auto CB = [](ExprHandle s, ExprHandle e) { |
218 | return Bound(s.node(), e.node()); |
219 | }; |
220 | |
221 | // Sanity check cases where the start and end is symbolic but the diff is |
222 | // constant. |
223 | // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) |
224 | ASSERT_EQ(OverlapKind::ContainedOrEqual, boundOverlap(CB(x, x), CB(x, x))); |
225 | ASSERT_EQ( |
226 | OverlapKind::PartialOverlap, |
227 | boundOverlap(CB(x, x + 3), CB(x + 2, x + 5))); |
228 | ASSERT_EQ(OverlapKind::NoOverlap, boundOverlap(CB(x, x), CB(x + 1, x + 1))); |
229 | |
230 | // We can't infer the sign of y, so cannot tell whether adding y is larger or |
231 | // smaller than y/2. |
232 | ASSERT_EQ( |
233 | OverlapKind::PartialOverlap, |
234 | boundOverlap(CB(x, x + y), CB(x, x + y / 2))); |
235 | |
236 | // No information about this bound, have to take the most conservative option: |
237 | // there may be an overlap. |
238 | ASSERT_EQ(OverlapKind::PartialOverlap, boundOverlap(CB(x, y), CB(z, w))); |
239 | |
240 | // Math on opaque terms works. |
241 | ASSERT_EQ( |
242 | OverlapKind::ContainedOrEqual, |
243 | boundOverlap(CB(x + w, y - z), CB(x + w, y - z))); |
244 | // Even requiring simplification. |
245 | ASSERT_EQ( |
246 | OverlapKind::ContainedOrEqual, |
247 | boundOverlap(CB(x - w - w, y), CB(x - w * 2, y))); |
248 | } |
249 | |
250 | // Tests the helper function for overlap of multi dimensional indices bounds. |
251 | // This uses boundOverlap on each dimension and return the "lowest" kind of |
252 | // overlap. |
253 | TEST(MemDependency, BoundOverlapMultiDim) { |
254 | using namespace analysis; |
255 | |
256 | auto CB = [](int s, int e) { |
257 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
258 | }; |
259 | |
260 | // Sanity check one dimensional cases. |
261 | ASSERT_EQ(OverlapKind::ContainedOrEqual, overlaps({CB(0, 0)}, {CB(0, 0)})); |
262 | ASSERT_EQ(OverlapKind::NoOverlap, overlaps({CB(0, 2)}, {CB(5, 10)})); |
263 | ASSERT_EQ( |
264 | OverlapKind::PartialOverlap, overlaps({CB(0, 100)}, {CB(100, 120)})); |
265 | |
266 | // Total overlap in 3 dims. |
267 | ASSERT_EQ( |
268 | OverlapKind::ContainedOrEqual, |
269 | overlaps({CB(0, 2), CB(0, 5), CB(0, 4)}, {CB(0, 2), CB(0, 5), CB(0, 4)})); |
270 | ASSERT_EQ( |
271 | OverlapKind::ContainedOrEqual, |
272 | overlaps( |
273 | {CB(0, 2), CB(0, 5), CB(0, 4)}, {CB(0, 2), CB(0, 5), CB(0, 10)})); |
274 | |
275 | // Total overlap in 2 dims, no overlap in another. |
276 | ASSERT_EQ( |
277 | OverlapKind::NoOverlap, |
278 | overlaps( |
279 | {CB(0, 2), CB(0, 5), CB(0, 4)}, {CB(0, 2), CB(0, 5), CB(5, 10)})); |
280 | |
281 | // Total overlap in 2 dims, partial overlap in another. |
282 | ASSERT_EQ( |
283 | OverlapKind::PartialOverlap, |
284 | overlaps( |
285 | {CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(0, 2), CB(0, 5), CB(5, 10)})); |
286 | // This case is most important, so verify the overlap in any dim. (dim 2) |
287 | ASSERT_EQ( |
288 | OverlapKind::PartialOverlap, |
289 | overlaps({CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(0, 2), CB(2, 6), CB(0, 5)})); |
290 | // Dim 1. |
291 | ASSERT_EQ( |
292 | OverlapKind::PartialOverlap, |
293 | overlaps({CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(1, 3), CB(0, 5), CB(0, 5)})); |
294 | // Total overlap in 1 dim, partial in 2. |
295 | ASSERT_EQ( |
296 | OverlapKind::PartialOverlap, |
297 | overlaps( |
298 | {CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(2, 6), CB(0, 5), CB(5, 10)})); |
299 | // Total overlap, partial overlap, no overlap. |
300 | ASSERT_EQ( |
301 | OverlapKind::NoOverlap, |
302 | overlaps( |
303 | {CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(2, 6), CB(11, 15), CB(0, 5)})); |
304 | |
305 | // Total overlap (B) in 2 dims, total overlap (A) in another. |
306 | ASSERT_EQ( |
307 | OverlapKind::Contains, |
308 | overlaps({CB(0, 2), CB(0, 5), CB(0, 4)}, {CB(0, 2), CB(0, 3), CB(0, 4)})); |
309 | |
310 | // Total overlap (A) in 2 dims, total overlap (B) in another. |
311 | ASSERT_EQ( |
312 | OverlapKind::Contains, |
313 | overlaps( |
314 | {CB(0, 12), CB(0, 15), CB(0, 4)}, {CB(0, 2), CB(0, 3), CB(0, 14)})); |
315 | |
316 | // Total (B), No Overlap, Total (A). |
317 | ASSERT_EQ( |
318 | OverlapKind::NoOverlap, |
319 | overlaps( |
320 | {CB(0, 2), CB(0, 5), CB(0, 5)}, {CB(0, 6), CB(11, 15), CB(1, 2)})); |
321 | } |
322 | |
323 | // Test the helper we use to subtract bounds: returns the regions(s) of A which |
324 | // remain after removing the region of B. |
325 | TEST(MemDependency, BoundSubtract) { |
326 | using namespace analysis; |
327 | |
328 | auto CB = [](int s, int e) { |
329 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
330 | }; |
331 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
332 | return indexBoundsEquals(x, y); |
333 | }; |
334 | |
335 | // One element subtract. |
336 | ASSERT_EQ(subtractBound(CB(0, 0), CB(0, 0)).size(), 0); |
337 | ASSERT_EQ(subtractBound(CB(5, 5), CB(5, 5)).size(), 0); |
338 | |
339 | // No Overlap. |
340 | ASSERT_TRUE(EQ(subtractBound(CB(5, 5), CB(2, 2)), {CB(5, 5)})); |
341 | ASSERT_TRUE(EQ(subtractBound(CB(5, 5), CB(0, 4)), {CB(5, 5)})); |
342 | |
343 | // one side overlap. |
344 | ASSERT_TRUE(EQ(subtractBound(CB(1, 5), CB(4, 7)), {CB(1, 3)})); |
345 | ASSERT_TRUE(EQ(subtractBound(CB(0, 5), CB(5, 7)), {CB(0, 4)})); |
346 | ASSERT_TRUE(EQ(subtractBound(CB(4, 5), CB(1, 4)), {CB(5, 5)})); |
347 | ASSERT_TRUE(EQ(subtractBound(CB(1, 5), CB(0, 4)), {CB(5, 5)})); |
348 | |
349 | // both sides overlap. |
350 | ASSERT_TRUE(EQ(subtractBound(CB(1, 5), CB(0, 7)), {})); |
351 | ASSERT_TRUE(EQ(subtractBound(CB(5, 5), CB(5, 7)), {})); |
352 | |
353 | // internal overlap. |
354 | ASSERT_TRUE(EQ(subtractBound(CB(1, 5), CB(2, 3)), {CB(1, 1), CB(4, 5)})); |
355 | ASSERT_TRUE(EQ(subtractBound(CB(0, 5), CB(2, 4)), {CB(0, 1), CB(5, 5)})); |
356 | } |
357 | |
358 | TEST(MemDependency, BoundSubtractSymbolic) { |
359 | VarHandle x("x" , kInt); |
360 | VarHandle y("y" , kInt); |
361 | VarHandle z("z" , kInt); |
362 | VarHandle w("w" , kInt); |
363 | |
364 | using namespace analysis; |
365 | |
366 | auto CB = [](ExprHandle s, ExprHandle e) { |
367 | return Bound(s.node(), e.node()); |
368 | }; |
369 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
370 | return indexBoundsEquals(x, y); |
371 | }; |
372 | |
373 | // One element subtract. |
374 | // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) |
375 | ASSERT_TRUE(EQ(subtractBound(CB(x, x), CB(x, x)), {})); |
376 | ASSERT_TRUE(EQ(subtractBound(CB(x + 1, x + 1), CB(x + 1, x + 1)), {})); |
377 | ASSERT_TRUE(EQ(subtractBound(CB(x * 2, x * 2), CB(x * 2, x * 2)), {})); |
378 | |
379 | // Subtract constant range low. |
380 | ASSERT_TRUE( |
381 | EQ(subtractBound(CB(x, x + 10), CB(x, x + 4)), {CB(x + 5, x + 10)})); |
382 | // Subtract constant range high. |
383 | ASSERT_TRUE( |
384 | EQ(subtractBound(CB(x, x + 10), CB(x + 6, x + 12)), {CB(x, x + 5)})); |
385 | // Subtract constant range total overlap. |
386 | ASSERT_TRUE(EQ(subtractBound(CB(x, x + 10), CB(x, x + 10)), {})); |
387 | ASSERT_TRUE(EQ(subtractBound(CB(x + 2, x + 10), CB(x, x + 12)), {})); |
388 | // Subtract constant range internal. |
389 | ASSERT_TRUE( |
390 | EQ(subtractBound(CB(x, x + 10), CB(x + 3, x + 7)), |
391 | {CB(x, x + 2), CB(x + 8, x + 10)})); |
392 | |
393 | // Size is inferable but not constant, only works with a single var. |
394 | ASSERT_TRUE(EQ(subtractBound(CB(0, x), CB(0, x * 2)), {})); |
395 | ASSERT_TRUE(EQ(subtractBound(CB(0, x * 2), CB(0, x - 1)), {CB(x, x * 2)})); |
396 | |
397 | // Size is not inferable. |
398 | ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(z, w)), {CB(x, y)})); |
399 | ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(x, z)), {CB(x, y)})); |
400 | ASSERT_TRUE(EQ(subtractBound(CB(x, y), CB(0, x)), {CB(x, y)})); |
401 | ASSERT_TRUE(EQ(subtractBound(CB(x, x), CB(0, 0)), {CB(x, x)})); |
402 | } |
403 | |
404 | // Tests the helper function that does subtraction, but for multi dimensional |
405 | // indices bounds. |
406 | TEST(MemDependency, BoundSubtractMultiDim) { |
407 | using namespace analysis; |
408 | |
409 | auto CB = [](int s, int e) { |
410 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
411 | }; |
412 | auto EQ = [](std::vector<IndexBounds> x, std::vector<IndexBounds> y) { |
413 | if (x.size() != y.size()) { |
414 | return false; |
415 | } |
416 | for (auto i = 0U; i < x.size(); ++i) { |
417 | if (!indexBoundsEquals(x[i], y[i])) { |
418 | return false; |
419 | } |
420 | } |
421 | return true; |
422 | }; |
423 | |
424 | // sanity check one dimension. |
425 | ASSERT_TRUE(EQ(subtractIndicesBounds({CB(0, 9)}, {CB(0, 9)}), {})); |
426 | ASSERT_TRUE(EQ(subtractIndicesBounds({CB(3, 9)}, {CB(0, 12)}), {})); |
427 | ASSERT_TRUE( |
428 | EQ(subtractIndicesBounds({CB(0, 12)}, {CB(0, 9)}), {{CB(10, 12)}})); |
429 | ASSERT_TRUE( |
430 | EQ(subtractIndicesBounds({CB(0, 12)}, {CB(3, 12)}), {{CB(0, 2)}})); |
431 | ASSERT_TRUE(EQ( |
432 | subtractIndicesBounds({CB(0, 9)}, {CB(1, 8)}), {{CB(0, 0)}, {CB(9, 9)}})); |
433 | |
434 | // Multi dim total overlap. |
435 | ASSERT_TRUE(EQ( |
436 | subtractIndicesBounds({CB(0, 9), CB(0, 2)}, {CB(0, 9), CB(0, 2)}), {})); |
437 | ASSERT_TRUE(EQ( |
438 | subtractIndicesBounds({CB(0, 9), CB(0, 2)}, {CB(0, 10), CB(0, 20)}), {})); |
439 | |
440 | // Mutli dim one way partial in dim 1. |
441 | ASSERT_TRUE( |
442 | EQ(subtractIndicesBounds({CB(0, 9), CB(0, 2)}, {CB(0, 3), CB(0, 2)}), |
443 | {{CB(4, 9), CB(0, 2)}})); |
444 | |
445 | // Mutli dim one way partial in dim 2. |
446 | ASSERT_TRUE( |
447 | EQ(subtractIndicesBounds({CB(0, 9), CB(0, 20)}, {CB(0, 9), CB(0, 10)}), |
448 | {{CB(0, 9), CB(11, 20)}})); |
449 | |
450 | // Partial overlap in 2 dims. |
451 | ASSERT_TRUE( |
452 | EQ(subtractIndicesBounds({CB(0, 5), CB(0, 5)}, {CB(2, 8), CB(2, 8)}), |
453 | {{CB(0, 1), CB(0, 5)}, {CB(2, 5), CB(0, 1)}})); |
454 | |
455 | // Partial overlap in 3 dims. |
456 | ASSERT_TRUE( |
457 | EQ(subtractIndicesBounds( |
458 | {CB(0, 5), CB(0, 5), CB(0, 5)}, {CB(2, 8), CB(2, 8), CB(2, 8)}), |
459 | {{CB(0, 1), CB(0, 5), CB(0, 5)}, |
460 | {CB(2, 5), CB(0, 1), CB(0, 5)}, |
461 | {CB(2, 5), CB(2, 5), CB(0, 1)}})); |
462 | } |
463 | |
464 | // Tests the multi dimensional subtraction code for bounds that cannot be fully |
465 | // materialized. |
466 | TEST(MemDependency, BoundSubtractMultiDimSymbolic) { |
467 | VarHandle x("x" , kInt); |
468 | VarHandle y("y" , kInt); |
469 | |
470 | using namespace analysis; |
471 | |
472 | auto CB = [](ExprHandle s, ExprHandle e) { |
473 | return Bound(s.node(), e.node()); |
474 | }; |
475 | |
476 | auto EQ = [](std::vector<IndexBounds> x, std::vector<IndexBounds> y) { |
477 | if (x.size() != y.size()) { |
478 | return false; |
479 | } |
480 | for (auto i = 0U; i < x.size(); ++i) { |
481 | if (!indexBoundsEquals(x[i], y[i])) { |
482 | return false; |
483 | } |
484 | } |
485 | return true; |
486 | }; |
487 | |
488 | // Cannot determine overlaps. |
489 | // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) |
490 | ASSERT_TRUE(EQ(subtractIndicesBounds({CB(x, x)}, {CB(0, 0)}), {{CB(x, x)}})); |
491 | |
492 | // Various total Overlaps. |
493 | ASSERT_TRUE(EQ( |
494 | subtractIndicesBounds({CB(x, x), CB(x, x)}, {CB(x, x), CB(x, x)}), {})); |
495 | ASSERT_TRUE(EQ( |
496 | subtractIndicesBounds({CB(x, y), CB(x, y)}, {CB(x, y), CB(x, y)}), {})); |
497 | ASSERT_TRUE(EQ( |
498 | subtractIndicesBounds({CB(x, x), CB(y, y)}, {CB(x, x), CB(y, y)}), {})); |
499 | ASSERT_TRUE(EQ( |
500 | subtractIndicesBounds({CB(0, x), CB(0, y)}, {CB(0, x), CB(0, y)}), {})); |
501 | |
502 | // one-way overlap in first dim. |
503 | ASSERT_TRUE( |
504 | EQ(subtractIndicesBounds({CB(0, x), CB(0, y)}, {CB(0, x - 5), CB(0, y)}), |
505 | {{CB(x - 4, x), CB(0, y)}})); |
506 | // second dim. |
507 | ASSERT_TRUE( |
508 | EQ(subtractIndicesBounds({CB(0, x), CB(0, y)}, {CB(0, x), CB(5, y)}), |
509 | {{CB(0, x), CB(0, 4)}})); |
510 | |
511 | // Internal overlap in first dim. |
512 | ASSERT_TRUE( |
513 | EQ(subtractIndicesBounds({CB(0, x), CB(0, y)}, {CB(2, x - 5), CB(0, y)}), |
514 | {{CB(0, 1), CB(0, y)}, {CB(x - 4, x), CB(0, y)}})); |
515 | // second dim. |
516 | ASSERT_TRUE(EQ( |
517 | subtractIndicesBounds({CB(0, x), CB(0, y)}, {CB(0, x), CB(10, y - 10)}), |
518 | {{CB(0, x), CB(0, 9)}, {CB(0, x), CB(y - 9, y)}})); |
519 | |
520 | // Overlap in both dimensions. |
521 | ASSERT_TRUE( |
522 | EQ(subtractIndicesBounds( |
523 | {CB(0, x), CB(0, y)}, {CB(5, x - 5), CB(10, y - 10)}), |
524 | { |
525 | {CB(0, 4), CB(0, y)}, |
526 | {CB(x - 4, x), CB(0, y)}, |
527 | {CB(0, x), CB(0, 9)}, |
528 | {CB(0, x), CB(y - 9, y)}, |
529 | })); |
530 | } |
531 | |
532 | // Simple check that the analyzer does anything at all... |
533 | TEST(MemDependency, MemDependencyCheckerSimple) { |
534 | BufHandle a("A" , {1}, kInt); |
535 | BufHandle b("B" , {1}, kInt); |
536 | |
537 | analysis::MemDependencyChecker analyzer; |
538 | |
539 | /* |
540 | * A[0] = 3; |
541 | * B[0] = A[0] + 1; |
542 | */ |
543 | |
544 | StorePtr aStore = Store::make(a, {0}, 3); |
545 | StorePtr bStore = Store::make(b, {0}, Add::make(Load::make(a, {0}), 1)); |
546 | |
547 | StmtPtr stmt = Block::make({aStore, bStore}); |
548 | |
549 | stmt->accept(&analyzer); |
550 | |
551 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore)); |
552 | ASSERT_FALSE(analyzer.dependsIndirectly(aStore, bStore)); |
553 | // sanity check, but anything that depends directly must depend indirectly. |
554 | ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aStore)); |
555 | } |
556 | |
557 | // Check that there is a difference between direct and indirect dependence. |
558 | TEST(MemDependency, MemDependencyCheckerMultiStmt) { |
559 | BufHandle a("A" , {1}, kInt); |
560 | BufHandle b("B" , {1}, kInt); |
561 | BufHandle c("C" , {1}, kInt); |
562 | |
563 | analysis::MemDependencyChecker analyzer; |
564 | |
565 | /* |
566 | * A[0] = 3; |
567 | * B[0] = A[0]; |
568 | * C[0] = B[0] + 1; |
569 | */ |
570 | |
571 | StorePtr aStore = Store::make(a, {0}, 3); |
572 | StorePtr bStore = Store::make(b, {0}, Load::make(a, {0})); |
573 | StorePtr cStore = Store::make(c, {0}, Add::make(Load::make(b, {0}), 1)); |
574 | |
575 | StmtPtr stmt = Block::make({aStore, bStore, cStore}); |
576 | |
577 | stmt->accept(&analyzer); |
578 | |
579 | // C depends on A indirectly. |
580 | ASSERT_FALSE(analyzer.dependsDirectly(cStore, aStore)); |
581 | ASSERT_TRUE(analyzer.dependsIndirectly(cStore, aStore)); |
582 | |
583 | // C depends on B directly, which depends on A directly. |
584 | ASSERT_TRUE(analyzer.dependsDirectly(cStore, bStore)); |
585 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore)); |
586 | |
587 | // Dependency goes top to bottom only. |
588 | ASSERT_FALSE(analyzer.dependsIndirectly(bStore, cStore)); |
589 | ASSERT_FALSE(analyzer.dependsIndirectly(aStore, bStore)); |
590 | ASSERT_FALSE(analyzer.dependsIndirectly(aStore, cStore)); |
591 | } |
592 | |
593 | // Verify that we do filter writes that are totally overlapped by later writes. |
594 | TEST(MemDependency, MemDependencyCheckerOverlap) { |
595 | BufHandle a("A" , {1}, kInt); |
596 | BufHandle b("B" , {1}, kInt); |
597 | |
598 | analysis::MemDependencyChecker analyzer; |
599 | |
600 | /* |
601 | * A[0] = 3; |
602 | * A[0] = 6; |
603 | * B[0] = A[0] + 1; |
604 | */ |
605 | |
606 | StorePtr aStore = Store::make(a, {0}, 3); |
607 | StorePtr a2Store = Store::make(a, {0}, 6); |
608 | StorePtr bStore = Store::make(b, {0}, Add::make(Load::make(a, {0}), 1)); |
609 | |
610 | StmtPtr stmt = Block::make({aStore, a2Store, bStore}); |
611 | |
612 | stmt->accept(&analyzer); |
613 | |
614 | // B store depends on second A store but not first since it is completely |
615 | // overlapped. |
616 | ASSERT_TRUE(analyzer.dependsIndirectly(bStore, a2Store)); |
617 | ASSERT_FALSE(analyzer.dependsIndirectly(bStore, aStore)); |
618 | |
619 | // No dependency between either A store. |
620 | ASSERT_FALSE(analyzer.dependsIndirectly(aStore, a2Store)); |
621 | ASSERT_FALSE(analyzer.dependsIndirectly(a2Store, aStore)); |
622 | } |
623 | |
624 | // Verify that bounds match loop iterations, and that dependencies progress |
625 | // across loop scopes. |
626 | TEST(MemDependency, MemDependencyCheckerLoop) { |
627 | BufHandle a("A" , {1}, kInt); |
628 | BufHandle b("B" , {1}, kInt); |
629 | VarHandle x("x" , kInt); |
630 | |
631 | using namespace analysis; |
632 | |
633 | MemDependencyChecker analyzer; |
634 | |
635 | /* |
636 | * for (int x = 0; x < 10; ++x) { |
637 | * A[x] = x; |
638 | * } |
639 | * B[0] = A[0] + 1; |
640 | */ |
641 | |
642 | StorePtr aStore = Store::make(a, {x}, x); |
643 | StmtPtr loop = For::make(x, 0, 10, aStore); |
644 | StorePtr bStore = Store::make(b, {0}, Add::make(Load::make(a, {4}), 1)); |
645 | |
646 | StmtPtr stmt = Block::make({loop, bStore}); |
647 | |
648 | stmt->accept(&analyzer); |
649 | |
650 | // Same A->B dependency. |
651 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, aStore)); |
652 | |
653 | // B depends on the loop. |
654 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop)); |
655 | // A is in the loop but does not depend on any loop iteration. |
656 | ASSERT_FALSE(analyzer.dependsIndirectly(aStore, loop)); |
657 | |
658 | auto aStoreAccess = analyzer.accessFor(aStore); |
659 | ASSERT_NE(aStoreAccess, nullptr); |
660 | |
661 | // It should have bounds covering the range of x: 0 <= x < 10. |
662 | ASSERT_TRUE(indexBoundsEquals( |
663 | aStoreAccess->bounds(), {Bound(alloc<IntImm>(0), alloc<IntImm>(9))})); |
664 | } |
665 | |
666 | // Reductions should promote dependencies as well. |
667 | TEST(MemDependency, MemDependencyCheckerLoopReduce) { |
668 | BufHandle a("A" , {10}, kInt); |
669 | BufHandle b("B" , {10}, kInt); |
670 | VarHandle x("x" , kInt); |
671 | |
672 | using namespace analysis; |
673 | |
674 | MemDependencyChecker analyzer; |
675 | |
676 | /* |
677 | * A[0] = 0; |
678 | * for (int x = 0; x < 10; ++x) { |
679 | * A[0] = A[x] + 1; |
680 | * } |
681 | * B[0] = A[0]; |
682 | */ |
683 | |
684 | StorePtr aInit = Store::make(a, {0}, 0); |
685 | ExprHandle reduce = Sum()(a, 1, {x}, {x}); |
686 | StorePtr aReduce = Store::make(a, {0}, reduce); |
687 | StmtPtr loop = For::make(x, 0, 10, aReduce); |
688 | StorePtr bStore = Store::make(b, {0}, Load::make(a, {0})); |
689 | |
690 | StmtPtr stmt = Block::make({aInit, loop, bStore}); |
691 | |
692 | stmt->accept(&analyzer); |
693 | |
694 | // B -> A. |
695 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, aReduce)); |
696 | |
697 | // B depends indirectly on the intializer of A, since the reduction depends |
698 | // on it. |
699 | ASSERT_FALSE(analyzer.dependsDirectly(bStore, aInit)); |
700 | ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aInit)); |
701 | |
702 | ASSERT_TRUE(analyzer.dependsDirectly(aReduce, aInit)); |
703 | |
704 | // B depends on the loop. |
705 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop)); |
706 | // A is in the loop and depends on other iterations. |
707 | ASSERT_TRUE(analyzer.dependsDirectly(aReduce, loop)); |
708 | |
709 | // The loop contents depend on the initializer too. |
710 | ASSERT_TRUE(analyzer.dependsDirectly(loop, aInit)); |
711 | |
712 | // Find loads within the reduction: |
713 | auto reduceLoads = NodeFinder<Load>::find(reduce.node()); |
714 | // Pull out the access for the load inside the loop. |
715 | for (auto load : reduceLoads) { |
716 | auto loopLoad = analyzer.accessFor(load); |
717 | // It should have 10 element long bounds. |
718 | ASSERT_TRUE(indexBoundsEquals( |
719 | loopLoad->bounds(), {Bound(alloc<IntImm>(0), alloc<IntImm>(9))})); |
720 | } |
721 | } |
722 | |
723 | // Lowering a reduction doesn't affect dependency analysis. |
724 | TEST(MemDependency, MemDependencyCheckerLoopReduceExpanded) { |
725 | BufHandle a("A" , {10}, kInt); |
726 | BufHandle b("B" , {10}, kInt); |
727 | VarHandle x("x" , kInt); |
728 | |
729 | using namespace analysis; |
730 | |
731 | MemDependencyChecker analyzer; |
732 | |
733 | /* |
734 | * A[0] = 0; |
735 | * for (int x = 0; x < 10; ++x) { |
736 | * A[0] = A[x] + 1; |
737 | * } |
738 | * B[0] = A[0]; |
739 | */ |
740 | |
741 | StorePtr aInit = Store::make(a, {0}, 0); |
742 | ExprHandle aLoad = Load::make(a, {x}); |
743 | StorePtr aReduce = Store::make(a, {0}, Add::make(aLoad, 1)); |
744 | StmtPtr loop = For::make(x, 0, 10, aReduce); |
745 | StorePtr bStore = Store::make(b, {0}, Load::make(a, {0})); |
746 | |
747 | StmtPtr stmt = Block::make({aInit, loop, bStore}); |
748 | |
749 | stmt->accept(&analyzer); |
750 | |
751 | // B -> A. |
752 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, aReduce)); |
753 | |
754 | // B depends indirectly on the intializer of A, since the reduction depends |
755 | // on it. |
756 | ASSERT_FALSE(analyzer.dependsDirectly(bStore, aInit)); |
757 | ASSERT_TRUE(analyzer.dependsIndirectly(bStore, aInit)); |
758 | |
759 | ASSERT_TRUE(analyzer.dependsDirectly(aReduce, aInit)); |
760 | |
761 | // B depends on the loop. |
762 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, loop)); |
763 | // A is in the loop and depends on other iterations. |
764 | ASSERT_TRUE(analyzer.dependsDirectly(aReduce, loop)); |
765 | |
766 | // The loop contents depend on the initializer too. |
767 | ASSERT_TRUE(analyzer.dependsDirectly(loop, aInit)); |
768 | |
769 | // Pull out the access for the store inside the loop. |
770 | auto loopLoad = analyzer.accessFor(aLoad.node()); |
771 | // It should have 10 element long bounds. |
772 | ASSERT_TRUE(indexBoundsEquals( |
773 | loopLoad->bounds(), {Bound(alloc<IntImm>(0), alloc<IntImm>(9))})); |
774 | } |
775 | |
776 | // Can determine dependencies of outputs, through to inputs. |
777 | TEST(MemDependency, MemDependencyCheckerInputsOutputs) { |
778 | BufHandle a("A" , {10}, kInt); |
779 | BufHandle b("B" , {10}, kInt); |
780 | VarHandle x("x" , kInt); |
781 | |
782 | // initialize analyzer with inputs and outputs. |
783 | analysis::MemDependencyChecker analyzer({a}, {b}); |
784 | |
785 | // Here's a Relu. |
786 | /* |
787 | * for (int x = 0; x < 10; ++x) { |
788 | * B[x] = Max(A[x], 0); |
789 | * } |
790 | */ |
791 | |
792 | ExprHandle aLoad = Load::make(a, {x}); |
793 | StorePtr bStore = Store::make(b, {x}, Max::make(aLoad, 0, true)); |
794 | StmtPtr loop = For::make(x, 0, 10, bStore); |
795 | |
796 | StmtPtr stmt = Block::make({loop}); |
797 | |
798 | stmt->accept(&analyzer); |
799 | |
800 | // Output depends indirectly on input. |
801 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
802 | // aLoad depends directly on the input A. |
803 | ASSERT_TRUE(analyzer.dependsDirectly(aLoad.node(), a.node())); |
804 | // bStore therefore depends directly on the input A. |
805 | ASSERT_TRUE(analyzer.dependsDirectly(bStore, a.node())); |
806 | // The output depends directly on the store. |
807 | ASSERT_TRUE(analyzer.dependsDirectly(b.node(), bStore)); |
808 | |
809 | // Check AccessInfo based overloads. |
810 | auto input = analyzer.input(a.node()); |
811 | auto output = analyzer.output(b.node()); |
812 | |
813 | // Output depends indirectly on input. |
814 | ASSERT_TRUE(analyzer.dependsIndirectly(output, input)); |
815 | // Not directly. |
816 | ASSERT_FALSE(analyzer.dependsDirectly(output, input)); |
817 | // Not in reverse order. |
818 | ASSERT_FALSE(analyzer.dependsIndirectly(input, output)); |
819 | |
820 | // output -> bStore -> bLoad -> input. |
821 | auto storeAccess = analyzer.accessFor(bStore); |
822 | auto loadAccess = analyzer.accessFor(aLoad.node()); |
823 | |
824 | ASSERT_TRUE(analyzer.dependsDirectly(output, storeAccess)); |
825 | ASSERT_TRUE(analyzer.dependsDirectly(loadAccess, input)); |
826 | } |
827 | |
828 | // Can tell if an output does not depend on an input. |
829 | TEST(MemDependency, MemDependencyCheckerOutputDoesntDepend) { |
830 | BufHandle a("A" , {10}, kInt); |
831 | BufHandle b("B" , {10}, kInt); |
832 | VarHandle x("x" , kInt); |
833 | |
834 | // initialize analyzer with inputs and outputs. |
835 | analysis::MemDependencyChecker analyzer({a}, {b}); |
836 | |
837 | // Here's a dumb Relu. |
838 | /* |
839 | * for (int x = 0; x < 10; ++x) { |
840 | * B[x] = Max(x, 0); |
841 | * } |
842 | */ |
843 | |
844 | StorePtr bStore = Store::make(b, {x}, Max::make(x, 0, true)); |
845 | StmtPtr loop = For::make(x, 0, 10, bStore); |
846 | |
847 | StmtPtr stmt = Block::make({loop}); |
848 | |
849 | stmt->accept(&analyzer); |
850 | |
851 | // Output does not depend indirectly on input. |
852 | ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), a.node())); |
853 | |
854 | // The output still depends directly on the store. |
855 | ASSERT_TRUE(analyzer.dependsDirectly(b.node(), bStore)); |
856 | |
857 | // Check AccessInfo based overloads. |
858 | auto input = analyzer.input(a.node()); |
859 | auto output = analyzer.output(b.node()); |
860 | |
861 | // Output does not depend indirectly on input. |
862 | ASSERT_FALSE(analyzer.dependsIndirectly(output, input)); |
863 | } |
864 | |
865 | // Verify different loop extents produce accesses with different bounds, and |
866 | // that later accesses find dependencies that overlap their entire bound range. |
867 | TEST(MemDependency, MemDependencyCheckerLoopBounds) { |
868 | BufHandle a("A" , {10}, kInt); |
869 | BufHandle b("B" , {10}, kInt); |
870 | BufHandle c("C" , {10}, kInt); |
871 | VarHandle x("x" , kInt); |
872 | using namespace analysis; |
873 | |
874 | MemDependencyChecker analyzer({a}, {c}); |
875 | |
876 | // This enables using the execution order of the loops to determine if some |
877 | // loops are self dependent or not. |
878 | analyzer.allowLoopExecutionOrderAnalysis(); |
879 | |
880 | /* |
881 | * for (int x = 1; x < 10; ++x) { |
882 | * B[x] = A[x]; |
883 | * } |
884 | * for (int x = 1; x < 9; ++x) { |
885 | * B[x] = B[x] * 2; |
886 | * } |
887 | * for (int x = 3; x < 4; ++x) { |
888 | * C[x] = A[x]; |
889 | * } |
890 | * for (int x = 0; x < 10; ++x) { |
891 | * C[x] = B[x]; |
892 | * } |
893 | */ |
894 | |
895 | std::vector<StmtPtr> stmts( |
896 | {For::make(x, 1, 10, Store::make(b, {x}, Load::make(a, {x}))), |
897 | For::make( |
898 | x, 1, 9, Store::make(b, {x}, Mul::make(Load::make(b, {x}), 2))), |
899 | For::make(x, 3, 4, Store::make(c, {x}, Load::make(a, {x}))), |
900 | For::make(x, 0, 10, Store::make(c, {x}, Load::make(b, {x})))}); |
901 | |
902 | StmtPtr stmt = Block::make(stmts); |
903 | |
904 | stmt->accept(&analyzer); |
905 | |
906 | auto input = analyzer.input(a.node()); |
907 | auto output = analyzer.output(c.node()); |
908 | |
909 | // sanity check Output -> Input. |
910 | ASSERT_TRUE(analyzer.dependsIndirectly(output, input)); |
911 | |
912 | // Check the For loop dependencies: |
913 | |
914 | // Last write to C depends on both writes to B since they contain the last |
915 | // write to at least one element. |
916 | ASSERT_TRUE(analyzer.dependsIndirectly(stmts[3], stmts[1])); |
917 | ASSERT_TRUE(analyzer.dependsIndirectly(stmts[3], stmts[0])); |
918 | |
919 | // The last write to C does not depend on the other write to C. |
920 | ASSERT_FALSE(analyzer.dependsIndirectly(stmts[3], stmts[2])); |
921 | |
922 | auto CB = [](int s, int e) { |
923 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
924 | }; |
925 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
926 | return indexBoundsEquals(x, y); |
927 | }; |
928 | |
929 | /* 0. Input: A[(0, 9)] - dependents: 1 5 |
930 | * 1. Load: A[(1, 9)] - depends on: 0 - dependents: 2 |
931 | * 2. Store: B[(1, 9)] - depends on: 1 - dependents: 3 7 |
932 | * 3. Load: B[(1, 8)] - depends on: 2 - dependents: 4 |
933 | * 4. Store: B[(1, 8)] - depends on: 3 - dependents: 7 |
934 | * 5. Load: A[(3, 3)] - depends on: 0 - dependents: 6 |
935 | * 6. Store: C[(3, 3)] - depends on: 5 |
936 | * 7. Load: B[(0, 9)] - depends on: 2 4 - dependents: 8 |
937 | * 8. Store: C[(0, 9)] - depends on: 7 - dependents: 9 |
938 | * 9. Output: C[(0, 9)] - depends on: 8 |
939 | */ |
940 | |
941 | // Now let's look at the bounds of each access. |
942 | // There are 9 accesses in this Stmt, so this is exhaustive, we wont do this |
943 | // much. |
944 | auto history = analyzer.getHistory(); |
945 | ASSERT_EQ(history.size(), 10); |
946 | VarPtr aVar = a.node()->base_handle(); |
947 | VarPtr bVar = b.node()->base_handle(); |
948 | VarPtr cVar = c.node()->base_handle(); |
949 | |
950 | // The first access is the input A. |
951 | ASSERT_EQ(history[0]->type(), AccessType::Input); |
952 | ASSERT_EQ(history[0]->var(), aVar); |
953 | // It has the bounds of the producing Input. |
954 | ASSERT_TRUE(EQ(history[0]->bounds(), {CB(0, 9)})); |
955 | // sanity check the input we retrieved earlier matches. |
956 | ASSERT_EQ(history[0], input); |
957 | |
958 | // The second access is the load of A in the first loop. |
959 | ASSERT_EQ(history[1]->type(), AccessType::Load); |
960 | ASSERT_EQ(history[1]->var(), aVar); |
961 | // It has the bounds of the loop, i.e. start == 1. |
962 | ASSERT_TRUE(EQ(history[1]->bounds(), {CB(1, 9)})); |
963 | // It reads from A, so it should have a dependency on the last write to this |
964 | // range - with is the input. |
965 | ASSERT_EQ(history[1]->dependencies().size(), 1); |
966 | ASSERT_TRUE(history[1]->hasDependency(history[0])); |
967 | |
968 | // The third access is the store into B in the first loop. |
969 | ASSERT_EQ(history[2]->type(), AccessType::Store); |
970 | ASSERT_EQ(history[2]->var(), bVar); |
971 | // It also has the bounds of the loop, i.e. start == 1. |
972 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(1, 9)})); |
973 | // The previous load is in its RHS, so it depends on it. |
974 | ASSERT_EQ(history[2]->dependencies().size(), 1); |
975 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
976 | |
977 | // The third access is the load from B in the second loop. |
978 | ASSERT_EQ(history[3]->type(), AccessType::Load); |
979 | ASSERT_EQ(history[3]->var(), bVar); |
980 | // It has the bounds of the second loop, i.e. >= 1 < 9. |
981 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(1, 8)})); |
982 | // It reads from B in a smaller range, so should depend on the previous |
983 | // store. |
984 | ASSERT_EQ(history[3]->dependencies().size(), 1); |
985 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
986 | |
987 | // The fourth: the store to B in the second loop. |
988 | ASSERT_EQ(history[4]->type(), AccessType::Store); |
989 | ASSERT_EQ(history[4]->var(), bVar); |
990 | // It also has the bounds of the second loop. |
991 | ASSERT_TRUE(EQ(history[4]->bounds(), {CB(1, 8)})); |
992 | // The previous load is in its RHS, so it depends on it as before. |
993 | ASSERT_EQ(history[4]->dependencies().size(), 1); |
994 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
995 | |
996 | // The fifth access is the load is from the 3rd loop, and skips previous B |
997 | // accesses. |
998 | ASSERT_EQ(history[5]->type(), AccessType::Load); |
999 | ASSERT_EQ(history[5]->var(), aVar); |
1000 | // It has the bounds of the third loop: >= 3 < 4. |
1001 | ASSERT_TRUE(EQ(history[5]->bounds(), {CB(3, 3)})); |
1002 | // It depends on the last thing to write to A, which is the A input. |
1003 | ASSERT_EQ(history[5]->dependencies().size(), 1); |
1004 | ASSERT_TRUE(history[5]->hasDependency(history[0])); |
1005 | |
1006 | // Sixth: the store into the output C. |
1007 | ASSERT_EQ(history[6]->type(), AccessType::Store); |
1008 | ASSERT_EQ(history[6]->var(), cVar); |
1009 | // It also has the bounds of the third loop. |
1010 | ASSERT_TRUE(EQ(history[6]->bounds(), {CB(3, 3)})); |
1011 | // The previous load is in its RHS, so it depends on it as always. |
1012 | ASSERT_EQ(history[6]->dependencies().size(), 1); |
1013 | ASSERT_TRUE(history[6]->hasDependency(history[5])); |
1014 | |
1015 | // The seventh access is the load of B in the fourth loop. |
1016 | ASSERT_EQ(history[7]->type(), AccessType::Load); |
1017 | ASSERT_EQ(history[7]->var(), bVar); |
1018 | // It has the bounds of the final loop, >= 0 < 10 |
1019 | ASSERT_TRUE(EQ(history[7]->bounds(), {CB(0, 9)})); |
1020 | // The bounds of this read are larger than the bounds of the previous write, |
1021 | // so it depends on both previous Stores to B. |
1022 | ASSERT_EQ(history[7]->dependencies().size(), 2); |
1023 | ASSERT_TRUE(history[7]->hasDependency(history[2])); |
1024 | ASSERT_TRUE(history[7]->hasDependency(history[4])); |
1025 | |
1026 | // Eight: the final store into the output C. |
1027 | ASSERT_EQ(history[8]->type(), AccessType::Store); |
1028 | ASSERT_EQ(history[8]->var(), cVar); |
1029 | // It also has the bounds of the final loop. |
1030 | ASSERT_TRUE(EQ(history[8]->bounds(), {CB(0, 9)})); |
1031 | // The previous load is in its RHS, so it depends on it as always. |
1032 | ASSERT_EQ(history[8]->dependencies().size(), 1); |
1033 | ASSERT_TRUE(history[8]->hasDependency(history[7])); |
1034 | |
1035 | // The last access represents the output Buf. |
1036 | ASSERT_EQ(history[9]->type(), AccessType::Output); |
1037 | ASSERT_EQ(history[9]->var(), cVar); |
1038 | // It has the bounds of the output Buf. |
1039 | ASSERT_TRUE(EQ(history[9]->bounds(), {CB(0, 9)})); |
1040 | // sanity check the input we retrieved earlier matches. |
1041 | ASSERT_EQ(history[9], output); |
1042 | // It depends on the last write to C only. |
1043 | ASSERT_EQ(history[9]->dependencies().size(), 1); |
1044 | ASSERT_TRUE(history[9]->hasDependency(history[8])); |
1045 | } |
1046 | |
1047 | // Verify that we can still infer bounds when the loop var is offset. |
1048 | TEST(MemDependency, MemDependencyCheckerLoopBoundsIndexShift) { |
1049 | BufHandle a("A" , {10}, kInt); |
1050 | BufHandle b("B" , {10}, kInt); |
1051 | VarHandle x("x" , kInt); |
1052 | |
1053 | using namespace analysis; |
1054 | |
1055 | MemDependencyChecker analyzer({a}, {b}); |
1056 | |
1057 | // This enables using the execution order of the loops to determine if some |
1058 | // loops are self dependent or not. |
1059 | analyzer.allowLoopExecutionOrderAnalysis(); |
1060 | |
1061 | /* |
1062 | * for (int x = 1; x < 10; x++) { |
1063 | * A[x] = A[x - 1]; |
1064 | * } |
1065 | * for (int x = 0; x < 9; x++) { |
1066 | * A[x] = A[x + 1]; |
1067 | * } |
1068 | * for (int x = 0; x < 9; x++) { |
1069 | * A[9 - x] = A[8 - x]; |
1070 | * } |
1071 | * for (int x = 0; x < 10; x++) { |
1072 | * A[x] = A[9 - x]; |
1073 | * } |
1074 | * for (int x = 0; x < 10; x++) { |
1075 | * B[x] = A[x]; |
1076 | * } |
1077 | */ |
1078 | |
1079 | StmtPtr stmt = Block::make( |
1080 | {For::make(x, 1, 10, Store::make(a, {x}, Load::make(a, {x - 1}))), |
1081 | For::make(x, 0, 9, Store::make(a, {x}, Load::make(a, {x + 1}))), |
1082 | For::make( |
1083 | x, |
1084 | 0, |
1085 | 9, |
1086 | Store::make( |
1087 | a, {ExprHandle(9) - x}, Load::make(a, {ExprHandle(8) - x}))), |
1088 | For::make( |
1089 | x, 0, 10, Store::make(a, {x}, Load::make(a, {ExprHandle(9) - x}))), |
1090 | For::make(x, 0, 10, Store::make(b, {x}, Load::make(a, {x})))}); |
1091 | |
1092 | stmt->accept(&analyzer); |
1093 | |
1094 | // Sanity check output depends on Input. |
1095 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
1096 | |
1097 | auto CB = [](int s, int e) { |
1098 | return Bound(alloc<IntImm>(s), alloc<IntImm>(e)); |
1099 | }; |
1100 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
1101 | return indexBoundsEquals(x, y); |
1102 | }; |
1103 | |
1104 | /* 0. Input: A[(0, 9)] - dependents: 1 |
1105 | * 1. Load: A[(0, 8)] - depends on: 0 2 - dependents: 2 |
1106 | * 2. Store: A[(1, 9)] - depends on: 1 - dependents: 1 3 |
1107 | * 3. Load: A[(1, 9)] - depends on: 2 - dependents: 4 |
1108 | * 4. Store: A[(0, 8)] - depends on: 3 - dependents: 5 7 |
1109 | * 5. Load: A[(0, 8)] - depends on: 4 - dependents: 6 |
1110 | * 6. Store: A[(1, 9)] - depends on: 5 - dependents: 7 |
1111 | * 7. Load: A[(0, 9)] - depends on: 4 6 8 - dependents: 8 |
1112 | * 8. Store: A[(0, 9)] - depends on: 7 - dependents: 7 9 |
1113 | * 9. Load: A[(0, 9)] - depends on: 8 - dependents: 10 |
1114 | * 10. Store: B[(0, 9)] - depends on: 9 - dependents: 11 |
1115 | * 11. Output: B[(0, 9)] - depends on: 10 |
1116 | */ |
1117 | |
1118 | // Now let's look at the bounds of each access. |
1119 | auto history = analyzer.getHistory(); |
1120 | ASSERT_EQ(history.size(), 12); |
1121 | VarPtr aVar = a.node()->base_handle(); |
1122 | VarPtr bVar = b.node()->base_handle(); |
1123 | |
1124 | // The first access is the input A. |
1125 | ASSERT_EQ(history[0]->type(), AccessType::Input); |
1126 | ASSERT_EQ(history[0]->var(), aVar); |
1127 | // It has the bounds of the producing Input. |
1128 | ASSERT_TRUE(EQ(history[0]->bounds(), {CB(0, 9)})); |
1129 | |
1130 | // The second access is the load A[x-1]. |
1131 | ASSERT_EQ(history[1]->type(), AccessType::Load); |
1132 | ASSERT_EQ(history[1]->var(), aVar); |
1133 | // It has the bounds of the loop modified by the offset of each index, in |
1134 | // this case -1. |
1135 | ASSERT_TRUE(EQ(history[1]->bounds(), {CB(0, 8)})); |
1136 | // It depends on the input, but also the store in the same loop, since |
1137 | // different interations of the loop depend on each other. |
1138 | ASSERT_EQ(history[1]->dependencies().size(), 2); |
1139 | ASSERT_TRUE(history[1]->hasDependency(history[0])); |
1140 | ASSERT_TRUE(history[1]->hasDependency(history[2])); |
1141 | |
1142 | // The third access is the Store to A[x] in the first loop. |
1143 | ASSERT_EQ(history[2]->type(), AccessType::Store); |
1144 | ASSERT_EQ(history[2]->var(), aVar); |
1145 | // It has no offset on x, so should have the same bounds as the loop. |
1146 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(1, 9)})); |
1147 | |
1148 | // The fourth access is the load A[x+1] in the second loop. |
1149 | ASSERT_EQ(history[3]->type(), AccessType::Load); |
1150 | ASSERT_EQ(history[3]->var(), aVar); |
1151 | // It has the bounds of the loop (0 <= x < 9) modified by the offset of each |
1152 | // index, in this case 1. |
1153 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(1, 9)})); |
1154 | // This load totally overlaps the previous write to A, so it depends only on |
1155 | // it and not the input. |
1156 | ASSERT_EQ(history[3]->dependencies().size(), 1); |
1157 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
1158 | |
1159 | // The fifth access is the store to A[x] in the second loop. |
1160 | ASSERT_EQ(history[4]->type(), AccessType::Store); |
1161 | ASSERT_EQ(history[4]->var(), aVar); |
1162 | // It has no offset on x, so should have the same bounds as the loop. |
1163 | ASSERT_TRUE(EQ(history[4]->bounds(), {CB(0, 8)})); |
1164 | |
1165 | // The sixth access is the load to A[8 - x] in the third loop. |
1166 | ASSERT_EQ(history[5]->type(), AccessType::Load); |
1167 | ASSERT_EQ(history[5]->var(), aVar); |
1168 | // It has the bounds of the loop (0 <= x < 9) modified by the offset of each |
1169 | // index, in this case 8 - x. |
1170 | // This access has a negative stride, which will be normalized. |
1171 | ASSERT_TRUE(EQ(history[5]->bounds(), {CB(0, 8)})); |
1172 | // This load totally overlaps the most recent write to A, so it depends only |
1173 | // on it and not the input or the first write to A. |
1174 | ASSERT_EQ(history[5]->dependencies().size(), 1); |
1175 | ASSERT_TRUE(history[5]->hasDependency(history[4])); |
1176 | |
1177 | // The seventh access is the store to A[9 - x] in the third loop. |
1178 | ASSERT_EQ(history[6]->type(), AccessType::Store); |
1179 | ASSERT_EQ(history[6]->var(), aVar); |
1180 | // This store has a negative stride on it's indices, but is notmalized |
1181 | // internally. |
1182 | ASSERT_TRUE(EQ(history[6]->bounds(), {CB(1, 9)})); |
1183 | |
1184 | // The eighth access is the load A[9-x] in the second loop. |
1185 | ASSERT_EQ(history[7]->type(), AccessType::Load); |
1186 | ASSERT_EQ(history[7]->var(), aVar); |
1187 | // It has the bounds of the loop (0 <= x < 9), modified by the offset 9 - x, |
1188 | // which esstentially traverses the loop backwards. |
1189 | ASSERT_TRUE(EQ(history[7]->bounds(), {CB(0, 9)})); |
1190 | // This Load has three write dependencies: |
1191 | ASSERT_EQ(history[7]->dependencies().size(), 3); |
1192 | // * The previous store (#6) for elements 1-9 |
1193 | ASSERT_TRUE(history[7]->hasDependency(history[6])); |
1194 | // * An earlier store (#4) covering element 0 |
1195 | ASSERT_TRUE(history[7]->hasDependency(history[4])); |
1196 | // * A future store inside this loop, since this loop modifies the buffer |
1197 | // in a non distinct way (due to the load and store having different access |
1198 | // strides). |
1199 | ASSERT_TRUE(history[7]->hasDependency(history[8])); |
1200 | |
1201 | // The ninth access is the store to A[x] in the fourth loop. |
1202 | ASSERT_EQ(history[8]->type(), AccessType::Store); |
1203 | ASSERT_EQ(history[8]->var(), aVar); |
1204 | // This store has a negative stride on it's indices, but is notmalized |
1205 | // internally. |
1206 | ASSERT_TRUE(EQ(history[8]->bounds(), {CB(0, 9)})); |
1207 | |
1208 | // The tenth and 11th acceses are the copy from A[x] to B[x]. |
1209 | ASSERT_EQ(history[9]->type(), AccessType::Load); |
1210 | ASSERT_EQ(history[9]->var(), aVar); |
1211 | ASSERT_EQ(history[10]->type(), AccessType::Store); |
1212 | ASSERT_EQ(history[10]->var(), bVar); |
1213 | |
1214 | // The last access represents the output Buf. |
1215 | ASSERT_EQ(history[11]->type(), AccessType::Output); |
1216 | ASSERT_EQ(history[11]->var(), bVar); |
1217 | // It has the bounds of the output Buf. |
1218 | ASSERT_TRUE(EQ(history[11]->bounds(), {CB(0, 9)})); |
1219 | // It depends on the last write to B only. |
1220 | ASSERT_EQ(history[11]->dependencies().size(), 1); |
1221 | ASSERT_TRUE(history[11]->hasDependency(history[10])); |
1222 | |
1223 | // ok that's enough of that. |
1224 | } |
1225 | |
1226 | // Check many different cases of loop self dependency - when a load within a |
1227 | // loop is dependent on a Store later in the same loop but in different |
1228 | // iteration. This is affected by whether or not we can trust the execution |
1229 | // order of the loop. |
1230 | TEST(MemDependency, MemDependencyCheckerLoopSelfDependency) { |
1231 | BufHandle a("A" , {5}, kInt); |
1232 | BufHandle b("B" , {5}, kInt); |
1233 | VarHandle x("x" , kInt); |
1234 | VarHandle y("y" , kInt); |
1235 | VarHandle z("z" , kInt); |
1236 | |
1237 | using namespace analysis; |
1238 | |
1239 | // This check assumes that the Stmt has a single Store with a single Load on |
1240 | // the RHS. |
1241 | auto isSelfDependent = |
1242 | [](const std::vector<std::shared_ptr<AccessInfo>>& history) -> bool { |
1243 | return history.front()->hasDependency(history.back()); |
1244 | }; |
1245 | |
1246 | { |
1247 | /* for (int y = 0; y < 10; y++) { |
1248 | * A[y] = (A[y]) + 1; |
1249 | * } */ |
1250 | |
1251 | // Not self dependent since all loop iterations use a different y. |
1252 | |
1253 | MemDependencyChecker analyzer; |
1254 | StmtPtr stmt = For::make( |
1255 | y, |
1256 | 0, |
1257 | 10, |
1258 | Block::make({Store::make(a, {y}, Add::make(Load::make(a, {y}), 1))})); |
1259 | |
1260 | stmt->accept(&analyzer); |
1261 | |
1262 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1263 | } |
1264 | |
1265 | { |
1266 | /* for (int y = 0; y < 10; y++) { |
1267 | * A[y + 1] = (A[y + 1]) + 1; |
1268 | * } |
1269 | */ |
1270 | |
1271 | // Not self dependent due to different y (with offset). |
1272 | |
1273 | MemDependencyChecker analyzer; |
1274 | StmtPtr stmt = For::make( |
1275 | y, |
1276 | 0, |
1277 | 10, |
1278 | Block::make( |
1279 | {Store::make(a, {y + 1}, Add::make(Load::make(a, {y + 1}), 1))})); |
1280 | |
1281 | stmt->accept(&analyzer); |
1282 | |
1283 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1284 | } |
1285 | |
1286 | { |
1287 | /* for (int x = 0; x < 10; x++) { |
1288 | * A[0] = (A[0]) + x; |
1289 | * } |
1290 | */ |
1291 | |
1292 | // Is self dependent since all loops use a common constant element of A. |
1293 | |
1294 | MemDependencyChecker analyzer; |
1295 | StmtPtr stmt = For::make( |
1296 | x, |
1297 | 0, |
1298 | 10, |
1299 | Block::make({Store::make(a, {0}, Add::make(Load::make(a, {0}), x))})); |
1300 | stmt->accept(&analyzer); |
1301 | |
1302 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1303 | } |
1304 | |
1305 | { |
1306 | /* for (int x = 0; x < 10; x++) { |
1307 | * A[0] = (B[0]) + x; |
1308 | * } |
1309 | */ |
1310 | |
1311 | // Is not self dependent beacause there is no store to the buffer that is |
1312 | // read. |
1313 | |
1314 | MemDependencyChecker analyzer; |
1315 | StmtPtr stmt = For::make( |
1316 | x, |
1317 | 0, |
1318 | 10, |
1319 | Block::make({Store::make(a, {0}, Add::make(Load::make(b, {0}), x))})); |
1320 | stmt->accept(&analyzer); |
1321 | |
1322 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1323 | } |
1324 | |
1325 | { |
1326 | /* for (int x = 0; x < 10; x++) { |
1327 | * A[y] = (A[y]) + x; |
1328 | * } |
1329 | */ |
1330 | |
1331 | // Is self dependent since all loops use a common symbolic element of A. |
1332 | |
1333 | MemDependencyChecker analyzer; |
1334 | StmtPtr stmt = For::make( |
1335 | x, |
1336 | 0, |
1337 | 10, |
1338 | Block::make({Store::make(a, {y}, Add::make(Load::make(a, {y}), x))})); |
1339 | stmt->accept(&analyzer); |
1340 | |
1341 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1342 | } |
1343 | |
1344 | { |
1345 | /* for (int x = 0; x < 10; x++) { |
1346 | * A[x] = A[x + 1]; |
1347 | * } |
1348 | */ |
1349 | |
1350 | // In this case it depends if we are considering execution order. |
1351 | |
1352 | MemDependencyChecker analyzer; |
1353 | |
1354 | StmtPtr stmt = |
1355 | For::make(x, 0, 10, Store::make(a, {x}, Load::make(a, {x + 1}))); |
1356 | stmt->accept(&analyzer); |
1357 | |
1358 | // With analysis of order disabled, this is self dependent since the read |
1359 | // from X+1 and the write to X+1 could be in reverse order. |
1360 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1361 | } |
1362 | |
1363 | { |
1364 | /* for (int x = 0; x < 10; x++) { |
1365 | * A[x] = A[x + 1]; |
1366 | * } |
1367 | */ |
1368 | |
1369 | MemDependencyChecker analyzer; |
1370 | analyzer.allowLoopExecutionOrderAnalysis(); |
1371 | |
1372 | StmtPtr stmt = |
1373 | For::make(x, 0, 10, Store::make(a, {x}, Load::make(a, {x + 1}))); |
1374 | stmt->accept(&analyzer); |
1375 | |
1376 | // If order analysis is enabled, this is not dependent since the read for |
1377 | // each element occurs before the write to that element. |
1378 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1379 | } |
1380 | |
1381 | { |
1382 | /* for (int x = 1; x < 10; x++) { |
1383 | * A[x] = A[x - 1]; |
1384 | * } |
1385 | */ |
1386 | |
1387 | MemDependencyChecker analyzer; |
1388 | |
1389 | StmtPtr stmt = |
1390 | For::make(x, 1, 10, Store::make(a, {x}, Load::make(a, {x - 1}))); |
1391 | stmt->accept(&analyzer); |
1392 | |
1393 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1394 | } |
1395 | |
1396 | { |
1397 | /* for (int x = 1; x < 10; x++) { |
1398 | * A[x] = A[x - 1]; |
1399 | * } |
1400 | */ |
1401 | |
1402 | MemDependencyChecker analyzer; |
1403 | analyzer.allowLoopExecutionOrderAnalysis(); |
1404 | |
1405 | StmtPtr stmt = |
1406 | For::make(x, 1, 10, Store::make(a, {x}, Load::make(a, {x - 1}))); |
1407 | stmt->accept(&analyzer); |
1408 | |
1409 | // In this case, even with order analysis the Load is dependent on the |
1410 | // Store, since the write to X occurs before the read from X. |
1411 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1412 | } |
1413 | |
1414 | { |
1415 | /* for (int x = 0; x < 9; x++) { |
1416 | * A[9 - x] = A[8 - x]; |
1417 | * } |
1418 | */ |
1419 | |
1420 | // Still works if the execution order is reversed, so long as the read |
1421 | // comes before the write. |
1422 | |
1423 | MemDependencyChecker analyzer; |
1424 | analyzer.allowLoopExecutionOrderAnalysis(); |
1425 | |
1426 | StmtPtr stmt = For::make( |
1427 | x, |
1428 | 3, |
1429 | 10, |
1430 | Store::make( |
1431 | a, {ExprHandle(9) - x}, Load::make(a, {ExprHandle(8) - x}))); |
1432 | stmt->accept(&analyzer); |
1433 | |
1434 | // However here was can determine the A store is earlier in the order than |
1435 | // the load. |
1436 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1437 | } |
1438 | |
1439 | { |
1440 | /* for (int x = 0; x < 9; x++) { |
1441 | * A[8 - x] = A[9 - x]; |
1442 | * } |
1443 | */ |
1444 | |
1445 | // But not if it doesn't. |
1446 | |
1447 | MemDependencyChecker analyzer; |
1448 | analyzer.allowLoopExecutionOrderAnalysis(); |
1449 | |
1450 | StmtPtr stmt = For::make( |
1451 | x, |
1452 | 3, |
1453 | 10, |
1454 | Store::make( |
1455 | a, {ExprHandle(8) - x}, Load::make(a, {ExprHandle(9) - x}))); |
1456 | stmt->accept(&analyzer); |
1457 | |
1458 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1459 | } |
1460 | |
1461 | { |
1462 | /* for (int x = 0; x < 9; x++) { |
1463 | * A[9 - x] = A[8 - x]; |
1464 | * } |
1465 | */ |
1466 | |
1467 | // And not if we're not relying on execution order. |
1468 | |
1469 | MemDependencyChecker analyzer; |
1470 | |
1471 | StmtPtr stmt = For::make( |
1472 | x, |
1473 | 3, |
1474 | 10, |
1475 | Store::make( |
1476 | a, {ExprHandle(9) - x}, Load::make(a, {ExprHandle(8) - x}))); |
1477 | stmt->accept(&analyzer); |
1478 | |
1479 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1480 | } |
1481 | |
1482 | { |
1483 | /* for (int x = 3; x < 10; x++) { |
1484 | * A[x - 2] = A[x - 1]; |
1485 | * } |
1486 | */ |
1487 | |
1488 | // Forward order but negative indices. |
1489 | |
1490 | MemDependencyChecker analyzer; |
1491 | analyzer.allowLoopExecutionOrderAnalysis(); |
1492 | |
1493 | StmtPtr stmt = |
1494 | For::make(x, 3, 10, Store::make(a, {x - 2}, Load::make(a, {x - 1}))); |
1495 | stmt->accept(&analyzer); |
1496 | |
1497 | // However here was can determine the A store is earlier in the order than |
1498 | // the load. |
1499 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1500 | } |
1501 | |
1502 | { |
1503 | /* for (int x = 0; x < 10; x++) { |
1504 | * A[x * 2] = A[x * 2]; |
1505 | * } |
1506 | */ |
1507 | |
1508 | // With an access stride. |
1509 | |
1510 | MemDependencyChecker analyzer; |
1511 | // Execution order doesn't matter since the read and the write are totally |
1512 | // distinct. |
1513 | |
1514 | StmtPtr stmt = |
1515 | For::make(x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2}))); |
1516 | stmt->accept(&analyzer); |
1517 | |
1518 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1519 | } |
1520 | |
1521 | { |
1522 | /* for (int x = 0; x < 10; x++) { |
1523 | * A[x * 2] = A[x * 2 + 1]; |
1524 | * } |
1525 | */ |
1526 | |
1527 | // Here we can use the common stride of the accesses to determine they are |
1528 | // distinct. |
1529 | // Note, this is the only place (loop self depedency) we use this stride |
1530 | // to avoid unnecessary depedence. |
1531 | |
1532 | MemDependencyChecker analyzer; |
1533 | // Execution order doesn't matter since the read and the write are totally |
1534 | // distinct. |
1535 | |
1536 | StmtPtr stmt = For::make( |
1537 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 + 1}))); |
1538 | stmt->accept(&analyzer); |
1539 | |
1540 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1541 | } |
1542 | |
1543 | { |
1544 | /* for (int x = 0; x < 10; x++) { |
1545 | * A[x * 2] = A[x * 2 - 1]; |
1546 | * } |
1547 | */ |
1548 | |
1549 | // same if the read is behind the write so long as they are distinct. |
1550 | |
1551 | MemDependencyChecker analyzer; |
1552 | StmtPtr stmt = For::make( |
1553 | x, 1, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 - 1}))); |
1554 | stmt->accept(&analyzer); |
1555 | |
1556 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1557 | } |
1558 | |
1559 | { |
1560 | /* for (int x = 0; x < 10; x++) { |
1561 | * A[x * 2] = A[x * 2 + 2]; |
1562 | * } |
1563 | */ |
1564 | |
1565 | // But not if the offset is in the stride. |
1566 | |
1567 | MemDependencyChecker analyzer; |
1568 | StmtPtr stmt = For::make( |
1569 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 + 2}))); |
1570 | stmt->accept(&analyzer); |
1571 | |
1572 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1573 | } |
1574 | |
1575 | { |
1576 | /* for (int x = 0; x < 10; x++) { |
1577 | * A[x * 2] = A[x * 2 - 2]; |
1578 | * } |
1579 | */ |
1580 | |
1581 | // Works with negative offsets too. |
1582 | |
1583 | MemDependencyChecker analyzer; |
1584 | StmtPtr stmt = For::make( |
1585 | x, 1, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 - 2}))); |
1586 | stmt->accept(&analyzer); |
1587 | |
1588 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1589 | } |
1590 | |
1591 | { |
1592 | /* for (int x = 0; x < 10; x++) { |
1593 | * A[x * 2] = A[x * 2 + 7]; |
1594 | * } |
1595 | */ |
1596 | |
1597 | // Detects accesses are distinct when offset is large but not a multiple |
1598 | // of stride. |
1599 | MemDependencyChecker analyzer; |
1600 | StmtPtr stmt = For::make( |
1601 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 + 7}))); |
1602 | stmt->accept(&analyzer); |
1603 | |
1604 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1605 | } |
1606 | |
1607 | { |
1608 | /* for (int x = 0; x < 10; x++) { |
1609 | * A[x * 2] = A[x * 2 + 4]; |
1610 | * } |
1611 | */ |
1612 | |
1613 | // Works with offsets which are multiples of the stride. |
1614 | MemDependencyChecker analyzer; |
1615 | StmtPtr stmt = For::make( |
1616 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 2 + 4}))); |
1617 | stmt->accept(&analyzer); |
1618 | |
1619 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1620 | } |
1621 | |
1622 | { |
1623 | /* for (int x = 0; x < 10; x++) { |
1624 | * A[x * 6] = A[x * 6 + 5]; |
1625 | * } |
1626 | */ |
1627 | |
1628 | // detects accesses are distinct with large strides when the offset is |
1629 | // within. |
1630 | |
1631 | MemDependencyChecker analyzer; |
1632 | StmtPtr stmt = For::make( |
1633 | x, 0, 10, Store::make(a, {x * 6}, Load::make(a, {x * 6 + 5}))); |
1634 | stmt->accept(&analyzer); |
1635 | |
1636 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1637 | } |
1638 | |
1639 | { |
1640 | /* for (int x = 0; x < 10; x++) { |
1641 | * A[x * 2] = A[x * 6]; |
1642 | * } |
1643 | */ |
1644 | |
1645 | // detects accesses are overlapping when stride is different but a |
1646 | // multiple. |
1647 | |
1648 | MemDependencyChecker analyzer; |
1649 | StmtPtr stmt = |
1650 | For::make(x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 6}))); |
1651 | stmt->accept(&analyzer); |
1652 | |
1653 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1654 | } |
1655 | |
1656 | { |
1657 | /* for (int x = 0; x < 10; x++) { |
1658 | * A[x * 4] = A[x * 2]; |
1659 | * } |
1660 | */ |
1661 | |
1662 | // still works when the read axis is the smaller stride. |
1663 | |
1664 | MemDependencyChecker analyzer; |
1665 | StmtPtr stmt = |
1666 | For::make(x, 0, 10, Store::make(a, {x * 4}, Load::make(a, {x * 2}))); |
1667 | stmt->accept(&analyzer); |
1668 | |
1669 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1670 | } |
1671 | |
1672 | { |
1673 | /* for (int x = 0; x < 10; x++) { |
1674 | * A[x * 2] = A[x * 6 + 1]; |
1675 | * } |
1676 | */ |
1677 | |
1678 | // detects accesses are distinct when stride is different but a multiple |
1679 | // and there is an offset. |
1680 | |
1681 | MemDependencyChecker analyzer; |
1682 | StmtPtr stmt = For::make( |
1683 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 6 + 1}))); |
1684 | stmt->accept(&analyzer); |
1685 | |
1686 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1687 | } |
1688 | |
1689 | { |
1690 | /* for (int x = 0; x < 10; x++) { |
1691 | * A[x * 2] = A[x * 6 + 4]; |
1692 | * } |
1693 | */ |
1694 | |
1695 | // The smaller stride determines whether there is overlap. |
1696 | |
1697 | MemDependencyChecker analyzer; |
1698 | StmtPtr stmt = For::make( |
1699 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 6 + 4}))); |
1700 | stmt->accept(&analyzer); |
1701 | |
1702 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1703 | } |
1704 | |
1705 | { |
1706 | /* for (int x = 0; x < 10; x++) { |
1707 | * A[x * 2 + 3] = A[x * 6]; |
1708 | * } |
1709 | */ |
1710 | |
1711 | // The smaller stride determines whether there is overlap, not the larger. |
1712 | |
1713 | MemDependencyChecker analyzer; |
1714 | StmtPtr stmt = For::make( |
1715 | x, 0, 10, Store::make(a, {x * 2 + 3}, Load::make(a, {x * 6}))); |
1716 | stmt->accept(&analyzer); |
1717 | |
1718 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1719 | } |
1720 | |
1721 | { |
1722 | /* for (int x = 0; x < 10; x++) { |
1723 | * A[x * 2] = A[x * 3 + 1]; |
1724 | * } |
1725 | */ |
1726 | |
1727 | // If they have strides with no common muliple > 1, they overlap. |
1728 | MemDependencyChecker analyzer; |
1729 | StmtPtr stmt = For::make( |
1730 | x, 0, 10, Store::make(a, {x * 2}, Load::make(a, {x * 3 + 1}))); |
1731 | stmt->accept(&analyzer); |
1732 | |
1733 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1734 | } |
1735 | |
1736 | { |
1737 | /* for (int x = 0; x < 10; x++) { |
1738 | * A[x] = A[x + 10]; |
1739 | * } |
1740 | */ |
1741 | |
1742 | // If the offset is greater than the size of the loop, they can't overlap. |
1743 | |
1744 | MemDependencyChecker analyzer; |
1745 | StmtPtr stmt = |
1746 | For::make(x, 0, 10, Store::make(a, {x}, Load::make(a, {x + 10}))); |
1747 | stmt->accept(&analyzer); |
1748 | |
1749 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1750 | } |
1751 | |
1752 | { |
1753 | /* for (int x = 0; x < 10; x++) { |
1754 | * A[x] = A[9 - x]; |
1755 | * } |
1756 | */ |
1757 | |
1758 | // If they have different execution orders they may overlap. |
1759 | MemDependencyChecker analyzer; |
1760 | StmtPtr stmt = For::make( |
1761 | x, 0, 10, Store::make(a, {x}, Load::make(a, {ExprHandle(9) - x}))); |
1762 | stmt->accept(&analyzer); |
1763 | |
1764 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1765 | } |
1766 | |
1767 | { |
1768 | /* for (int x = 0; x < 10; x++) { |
1769 | * A[x * 2] = A[19 - x * 2]; |
1770 | * } |
1771 | */ |
1772 | |
1773 | // Or they may not, depending on their start offset and strides. |
1774 | MemDependencyChecker analyzer; |
1775 | StmtPtr stmt = For::make( |
1776 | x, |
1777 | 0, |
1778 | 10, |
1779 | Store::make(a, {x * 2}, Load::make(a, {ExprHandle(19) - x * 2}))); |
1780 | stmt->accept(&analyzer); |
1781 | |
1782 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1783 | } |
1784 | |
1785 | { |
1786 | /* for (int x = 0; x < 10; x++) { |
1787 | * A[x / 2] = A[x / 2]; |
1788 | * } |
1789 | */ |
1790 | |
1791 | // If the stride is not monotonic, they overlap. |
1792 | |
1793 | MemDependencyChecker analyzer; |
1794 | StmtPtr stmt = |
1795 | For::make(x, 0, 10, Store::make(a, {x / 2}, Load::make(a, {x / 2}))); |
1796 | stmt->accept(&analyzer); |
1797 | |
1798 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1799 | } |
1800 | |
1801 | { |
1802 | /* for (int x = 0; x < 10; x++) { |
1803 | * A[x / 2] = A[x / 2] + 1; |
1804 | * } |
1805 | */ |
1806 | |
1807 | // If the stride is not monotonic, they overlap - even with an offset. |
1808 | MemDependencyChecker analyzer; |
1809 | StmtPtr stmt = For::make( |
1810 | x, 0, 10, Store::make(a, {x / 2}, Load::make(a, {x / 2 + 1}))); |
1811 | stmt->accept(&analyzer); |
1812 | |
1813 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1814 | } |
1815 | |
1816 | { |
1817 | /* for (int x = 0; x < 10; x++) { |
1818 | * A[x % 2] = A[x % 2]; |
1819 | * } |
1820 | */ |
1821 | |
1822 | // Mod too... |
1823 | |
1824 | analysis::MemDependencyChecker analyzer; |
1825 | StmtPtr stmt = For::make( |
1826 | x, |
1827 | 0, |
1828 | 10, |
1829 | Store::make(a, {Mod::make(x, 2)}, Load::make(a, {Mod::make(x, 2)}))); |
1830 | stmt->accept(&analyzer); |
1831 | |
1832 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1833 | } |
1834 | |
1835 | { |
1836 | /* for (int x = y; x < z; x++) { |
1837 | * A[x] = A[x + 1]; |
1838 | * } |
1839 | */ |
1840 | |
1841 | // Still works with symbolic loop extents. |
1842 | |
1843 | { |
1844 | MemDependencyChecker analyzer; |
1845 | StmtPtr stmt = |
1846 | For::make(x, y, z, Store::make(a, {x}, Load::make(a, {x + 1}))); |
1847 | stmt->accept(&analyzer); |
1848 | |
1849 | ASSERT_TRUE(isSelfDependent(analyzer.getHistory())); |
1850 | } |
1851 | |
1852 | { |
1853 | MemDependencyChecker analyzer; |
1854 | analyzer.allowLoopExecutionOrderAnalysis(); |
1855 | StmtPtr stmt = |
1856 | For::make(x, y, z, Store::make(a, {x}, Load::make(a, {x + 1}))); |
1857 | stmt->accept(&analyzer); |
1858 | |
1859 | ASSERT_FALSE(isSelfDependent(analyzer.getHistory())); |
1860 | } |
1861 | } |
1862 | } |
1863 | |
1864 | // Verify that a strided access still works. |
1865 | // TODO: actually this only works because of the size of the ranges, revist this |
1866 | // test after strided overlap is implemented. |
1867 | TEST(MemDependency, MemDependencyCheckerLoopDistinctStrides) { |
1868 | BufHandle a("A" , {20}, kInt); |
1869 | BufHandle b("B" , {20}, kInt); |
1870 | VarHandle x("x" , kInt); |
1871 | VarHandle y("y" , kInt); |
1872 | |
1873 | using namespace analysis; |
1874 | MemDependencyChecker analyzer({a.node()}, {b.node()}); |
1875 | StmtPtr stmt = Block::make( |
1876 | {For::make( |
1877 | x, 0, 10, Store::make(b, {x * 2 + 1}, Load::make(a, {x * 2 + 1}))), |
1878 | For::make(x, 0, 10, Store::make(b, {x * 2}, Load::make(a, {x * 2}))) |
1879 | |
1880 | }); |
1881 | stmt->accept(&analyzer); |
1882 | |
1883 | // Sanity check output depends on input. |
1884 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
1885 | |
1886 | // Output has 2 dependencies... the store in each loop. |
1887 | auto outputAccess = analyzer.output(b.node()); |
1888 | ASSERT_EQ(outputAccess->dependencies().size(), 2); |
1889 | } |
1890 | |
1891 | /* TODO(nickg) - this test will fail due to the lack of stride math in Bound |
1892 | TEST(MemDependency, MemDependencyCheckerLoopDistinctStrides) { |
1893 | BufHandle a("A", {20}, kInt); |
1894 | BufHandle b("B", {20}, kInt); |
1895 | BufHandle c("C", {10}, kInt); |
1896 | VarHandle x("x", kInt); |
1897 | VarHandle y("y", kInt); |
1898 | |
1899 | { |
1900 | analysis::MemDependencyChecker analyzer({a.node()}, {c.node()}); |
1901 | StmtPtr stmt = Block::make( |
1902 | {For::make( |
1903 | x, |
1904 | 0, |
1905 | 10, |
1906 | Store::make(b, {x * 2 + 1}, Load::make(a, {x * 2 + 1}))), |
1907 | For::make( |
1908 | x, 0, 10, Store::make(b, {x * 2}, Load::make(a, {x * 2}))), |
1909 | For::make(x, 0, 10, Store::make(c, {x}, Load::make(b, {x}))) |
1910 | |
1911 | }); |
1912 | stmt->accept(&analyzer); |
1913 | |
1914 | std::cout << *stmt << "\n"; |
1915 | for (auto& wi : analyzer.getHistory()) { |
1916 | wi->print(); |
1917 | } |
1918 | } |
1919 | }*/ |
1920 | |
1921 | // analysis on Stmts using Cond. |
1922 | TEST(MemDependency, MemDependencyCheckerLoopBoundsCond) { |
1923 | BufHandle a("A" , {10}, kInt); |
1924 | BufHandle b("B" , {10}, kInt); |
1925 | BufHandle c("C" , {10}, kInt); |
1926 | VarHandle x("x" , kInt); |
1927 | VarHandle y("y" , kInt); |
1928 | |
1929 | using namespace analysis; |
1930 | |
1931 | { |
1932 | /* for (int x = 0; x < 10; x++) { |
1933 | * C[x] = A[x]; |
1934 | * } |
1935 | * if (y<5 ? 1 : 0) { |
1936 | * C[0] = (B[0]) + 1; |
1937 | * } else { |
1938 | * C[0] = (B[1]) + 1; |
1939 | * } |
1940 | */ |
1941 | |
1942 | // Future usages may depend on accesses in both branches of a condition. |
1943 | |
1944 | MemDependencyChecker analyzer({a, b}, {c}); |
1945 | StmtPtr stmt = Block::make( |
1946 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
1947 | Cond::make( |
1948 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
1949 | Store::make(c, {0}, Add::make(Load::make(b, {0}), 1)), |
1950 | Store::make(c, {0}, Add::make(Load::make(b, {1}), 1)))}); |
1951 | |
1952 | stmt->accept(&analyzer); |
1953 | |
1954 | // Output C should have 3 dependencies, each of the three stores. |
1955 | auto outputAccess = analyzer.output(c.node()); |
1956 | ASSERT_NE(outputAccess, nullptr); |
1957 | ASSERT_EQ(outputAccess->dependencies().size(), 3); |
1958 | |
1959 | // C depends indirectly on A and B. |
1960 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
1961 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
1962 | } |
1963 | |
1964 | { |
1965 | /* for (int x = 0; x < 10; x++) { |
1966 | * C[x] = A[x]; |
1967 | * } |
1968 | * if (y<5 ? 1 : 0) { |
1969 | * for (int x = 0; x < 10; x++) { |
1970 | * C[x] = B[x]; |
1971 | * } |
1972 | * } else { |
1973 | * for (int x = 0; x < 10; x++) { |
1974 | * C[x] = (B[x]) + 1; |
1975 | * } |
1976 | * } |
1977 | */ |
1978 | |
1979 | // Future usages may depend on accesses in both branches of a condition. |
1980 | |
1981 | MemDependencyChecker analyzer({a, b}, {c}); |
1982 | StmtPtr stmt = Block::make( |
1983 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
1984 | Cond::make( |
1985 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
1986 | For::make(x, 0, 10, Store::make(c, {x}, Load::make(b, {x}))), |
1987 | For::make( |
1988 | x, |
1989 | 0, |
1990 | 10, |
1991 | Store::make(c, {x}, Add::make(Load::make(b, {x}), 1))))}); |
1992 | |
1993 | stmt->accept(&analyzer); |
1994 | |
1995 | // Output C should have 3 dependencies, each of the three stores. |
1996 | auto outputAccess = analyzer.output(c.node()); |
1997 | ASSERT_NE(outputAccess, nullptr); |
1998 | ASSERT_EQ(outputAccess->dependencies().size(), 3); |
1999 | |
2000 | // TODO(nickg): actually since the true and false branch cover the total |
2001 | // range of the first store this should have 2 dependencies, but we don't |
2002 | // do that yet. |
2003 | |
2004 | // C depends indirectly on A and B. |
2005 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2006 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2007 | } |
2008 | |
2009 | { |
2010 | /* for (int x = 0; x < 10; x++) { |
2011 | * C[x] = A[x]; |
2012 | * } |
2013 | * if (y<5 ? 1 : 0) { |
2014 | * for (int x = 0; x < 10; x++) { |
2015 | * C[x] = (B[x]) + 1; |
2016 | * } |
2017 | * } |
2018 | */ |
2019 | |
2020 | // Only has true branch. |
2021 | |
2022 | MemDependencyChecker analyzer({a, b}, {c}); |
2023 | StmtPtr stmt = Block::make( |
2024 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
2025 | Cond::make( |
2026 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
2027 | For::make( |
2028 | x, |
2029 | 0, |
2030 | 10, |
2031 | Store::make(c, {x}, Add::make(Load::make(b, {x}), 1))), |
2032 | nullptr)}); |
2033 | |
2034 | stmt->accept(&analyzer); |
2035 | |
2036 | // Output C should have 3 dependencies, each of the three stores. |
2037 | auto outputAccess = analyzer.output(c.node()); |
2038 | ASSERT_NE(outputAccess, nullptr); |
2039 | ASSERT_EQ(outputAccess->dependencies().size(), 2); |
2040 | |
2041 | // C depends indirectly on A and B. |
2042 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2043 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2044 | } |
2045 | |
2046 | { |
2047 | /* for (int x = 0; x < 10; x++) { |
2048 | * C[x] = A[x]; |
2049 | * } |
2050 | * if (y<5 ? 1 : 0) { |
2051 | * } else { |
2052 | * for (int x = 0; x < 10; x++) { |
2053 | * C[x] = (B[x]) + 1; |
2054 | * } |
2055 | * } |
2056 | */ |
2057 | |
2058 | // Only has false branch. |
2059 | |
2060 | MemDependencyChecker analyzer({a, b}, {c}); |
2061 | StmtPtr stmt = Block::make( |
2062 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
2063 | Cond::make( |
2064 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
2065 | nullptr, |
2066 | For::make( |
2067 | x, |
2068 | 0, |
2069 | 10, |
2070 | Store::make(c, {x}, Add::make(Load::make(b, {x}), 1))))}); |
2071 | |
2072 | stmt->accept(&analyzer); |
2073 | |
2074 | // Output C should have 3 dependencies, each of the three stores. |
2075 | auto outputAccess = analyzer.output(c.node()); |
2076 | ASSERT_NE(outputAccess, nullptr); |
2077 | ASSERT_EQ(outputAccess->dependencies().size(), 2); |
2078 | |
2079 | // C depends indirectly on A and B. |
2080 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2081 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2082 | } |
2083 | |
2084 | { |
2085 | /* for (int x = 0; x < 10; x++) { |
2086 | * C[x] = A[x]; |
2087 | * } |
2088 | * if (C[0]<5 ? 1 : 0) { |
2089 | * C[0] = 5; |
2090 | * } |
2091 | */ |
2092 | |
2093 | // Cond's Condition depends on a previous access. |
2094 | |
2095 | MemDependencyChecker analyzer({a}, {c}); |
2096 | StorePtr initStore = Store::make(c, {x}, Load::make(a, {x})); |
2097 | ExprHandle conditionalLoad = Load::make(c, {0}); |
2098 | StmtPtr stmt = Block::make( |
2099 | {For::make(x, 0, 10, initStore), |
2100 | Cond::make( |
2101 | CompareSelect::make( |
2102 | conditionalLoad, 5, CompareSelectOperation::kLT), |
2103 | Store::make(c, {0}, 5), |
2104 | nullptr)}); |
2105 | |
2106 | stmt->accept(&analyzer); |
2107 | |
2108 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2109 | |
2110 | ASSERT_TRUE(analyzer.dependsDirectly(conditionalLoad.node(), initStore)); |
2111 | ASSERT_FALSE(analyzer.dependsDirectly(conditionalLoad.node(), a.node())); |
2112 | ASSERT_TRUE(analyzer.dependsIndirectly(conditionalLoad.node(), a.node())); |
2113 | } |
2114 | } |
2115 | |
2116 | // Stmts using IfThenElse. |
2117 | TEST(MemDependency, MemDependencyCheckerIfThenElse) { |
2118 | BufHandle a("A" , {10}, kInt); |
2119 | BufHandle b("B" , {10}, kInt); |
2120 | BufHandle c("C" , {10}, kInt); |
2121 | VarHandle x("x" , kInt); |
2122 | VarHandle y("y" , kInt); |
2123 | |
2124 | using namespace analysis; |
2125 | |
2126 | { |
2127 | /* for (int x = 0; x < 10; x++) { |
2128 | * C[x] = A[x]; |
2129 | * } |
2130 | * C[0] = (y < 5 ? (B[0]) + 1 : (B[1]) + 1; |
2131 | */ |
2132 | |
2133 | // Future usages may depend on accesses in both branches of a condition. |
2134 | |
2135 | MemDependencyChecker analyzer({a, b}, {c}); |
2136 | StorePtr ifStore = Store::make( |
2137 | c, |
2138 | {0}, |
2139 | IfThenElse::make( |
2140 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
2141 | Add::make(Load::make(b, {0}), 1), |
2142 | Add::make(Load::make(b, {1}), 1))); |
2143 | StmtPtr stmt = Block::make( |
2144 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
2145 | ifStore}); |
2146 | |
2147 | stmt->accept(&analyzer); |
2148 | |
2149 | // Output C should have 2 dependencies, each of the two stores. |
2150 | auto outputAccess = analyzer.output(c.node()); |
2151 | ASSERT_NE(outputAccess, nullptr); |
2152 | ASSERT_EQ(outputAccess->dependencies().size(), 2); |
2153 | |
2154 | // Now we need to check the Store containing the IfThenElse. |
2155 | auto ifStoreAccess = analyzer.accessFor(ifStore); |
2156 | |
2157 | // It should have 2 dependencies. |
2158 | ASSERT_EQ(ifStoreAccess->dependencies().size(), 2); |
2159 | |
2160 | // C depends indirectly on A and B. |
2161 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2162 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2163 | } |
2164 | |
2165 | { |
2166 | /* for (int x = 0; x < 10; x++) { |
2167 | * C[x] = A[x]; |
2168 | * } |
2169 | * C[0] = (y < 5 ? (B[0]) + 1 : 42; |
2170 | */ |
2171 | |
2172 | // If the load appears in only one side of an IfThenElse the output may be |
2173 | // dependent on it. |
2174 | |
2175 | MemDependencyChecker analyzer({a, b}, {c}); |
2176 | StorePtr ifStore = Store::make( |
2177 | c, |
2178 | {0}, |
2179 | IfThenElse::make( |
2180 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
2181 | Add::make(Load::make(b, {0}), 1), |
2182 | 42)); |
2183 | StmtPtr stmt = Block::make( |
2184 | {For::make(x, 0, 10, Store::make(c, {x}, Load::make(a, {x}))), |
2185 | ifStore}); |
2186 | |
2187 | stmt->accept(&analyzer); |
2188 | |
2189 | // C depends indirectly on A and B. |
2190 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2191 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2192 | } |
2193 | |
2194 | { |
2195 | /* for (int x = 0; x < 10; x++) { |
2196 | * C[x] = (x < 5 ? B[x] : A[x]; |
2197 | * } |
2198 | */ |
2199 | |
2200 | // In this case C is dependent on both A and B. |
2201 | |
2202 | // TODO: in cases like this it would be possible to split the range of B |
2203 | // into two bounds, one dependent on A and one depenent on B. We'd need to |
2204 | // examine conditions relative to previously encountered loop variables. I'm |
2205 | // uncertain if this would be helpful. |
2206 | |
2207 | MemDependencyChecker analyzer({a, b}, {c}); |
2208 | StorePtr ifStore = Store::make( |
2209 | c, |
2210 | {0}, |
2211 | IfThenElse::make( |
2212 | CompareSelect::make(y, 5, CompareSelectOperation::kLT), |
2213 | Load::make(b, {x}), |
2214 | Load::make(a, {x}))); |
2215 | StmtPtr stmt = Block::make({For::make(x, 0, 10, ifStore)}); |
2216 | |
2217 | stmt->accept(&analyzer); |
2218 | |
2219 | // C depends indirectly on A and B. |
2220 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2221 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2222 | } |
2223 | } |
2224 | |
2225 | // Cutting a loop with single elem writes |
2226 | TEST(MemDependency, MemDependencyCheckerCutLoop) { |
2227 | BufHandle a("A" , {10}, kInt); |
2228 | BufHandle b("B" , {10}, kInt); |
2229 | VarHandle x("x" , kInt); |
2230 | |
2231 | using namespace analysis; |
2232 | |
2233 | { |
2234 | /* for (int x = 0; x < 10; x++) { |
2235 | * B[x] = A[x]; |
2236 | * } |
2237 | * B[5] = 100; |
2238 | */ |
2239 | |
2240 | // Cutting a loop with single element writes. |
2241 | |
2242 | MemDependencyChecker analyzer({a}, {b}); |
2243 | StmtPtr stmt = Block::make( |
2244 | {For::make(x, 0, 10, Store::make(b, {x}, Load::make(a, {x}))), |
2245 | Store::make(b, {5}, 100)}); |
2246 | |
2247 | stmt->accept(&analyzer); |
2248 | |
2249 | // Output depends on input. |
2250 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2251 | |
2252 | // Output has 2 depdenencies. |
2253 | auto outputAccess = analyzer.output(b.node()); |
2254 | ASSERT_NE(outputAccess, nullptr); |
2255 | ASSERT_EQ(outputAccess->dependencies().size(), 2); |
2256 | } |
2257 | |
2258 | { |
2259 | /* for (int x = 0; x < 10; x++) { |
2260 | * B[x] = A[x]; |
2261 | * } |
2262 | * for (int x = 4; x < 7; x++) { |
2263 | * B[x] = B[x] + 3; |
2264 | * } |
2265 | * B[5] = 100; |
2266 | * B[6] = 101; |
2267 | * B[7] = 102; |
2268 | */ |
2269 | |
2270 | // Cutting a loop with a smaller loop but then totally overlap that second |
2271 | // loop with one element writes. |
2272 | |
2273 | MemDependencyChecker analyzer({a}, {b}); |
2274 | ForPtr firstLoop = |
2275 | For::make(x, 0, 10, Store::make(b, {x}, Load::make(a, {x}))); |
2276 | StorePtr secondStore = |
2277 | Store::make(b, {x}, Add::make(Load::make(b, {x}), 1)); |
2278 | ForPtr secondLoop = For::make(x, 4, 7, secondStore); |
2279 | |
2280 | StmtPtr stmt = Block::make( |
2281 | {firstLoop, |
2282 | secondLoop, |
2283 | Store::make(b, {4}, 100), |
2284 | Store::make(b, {5}, 101), |
2285 | Store::make(b, {6}, 102)}); |
2286 | |
2287 | stmt->accept(&analyzer); |
2288 | |
2289 | // Output depends on input. |
2290 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2291 | |
2292 | // Output has 4 depdenencies. |
2293 | auto outputAccess = analyzer.output(b.node()); |
2294 | ASSERT_NE(outputAccess, nullptr); |
2295 | ASSERT_EQ(outputAccess->dependencies().size(), 4); |
2296 | |
2297 | // Second loop depends on first loop. |
2298 | ASSERT_TRUE(analyzer.dependsDirectly(secondLoop, firstLoop)); |
2299 | |
2300 | // Output does not depend on second loop or store. |
2301 | ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), secondLoop)); |
2302 | ASSERT_FALSE(analyzer.dependsIndirectly(b.node(), secondStore)); |
2303 | } |
2304 | } |
2305 | |
2306 | // Dynamic shapes (load in indices). |
2307 | TEST(MemDependency, MemDependencyCheckerDynamicShapes) { |
2308 | BufHandle a("A" , {100}, kInt); |
2309 | BufHandle b("B" , {100}, kInt); |
2310 | BufHandle c("C" , {100}, kInt); |
2311 | VarHandle x("x" , kInt); |
2312 | |
2313 | using namespace analysis; |
2314 | |
2315 | auto CB = [](ExprHandle s, ExprHandle e) { |
2316 | return Bound(s.node(), e.node()); |
2317 | }; |
2318 | |
2319 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
2320 | return indexBoundsEquals(x, y); |
2321 | }; |
2322 | |
2323 | { |
2324 | /* for (int x = 0; x < B[0]; x++) { |
2325 | * C[x] = A[x]; |
2326 | * } |
2327 | */ |
2328 | MemDependencyChecker analyzer({a, b}, {c}); |
2329 | StmtPtr stmt = Block::make({For::make( |
2330 | x, 0, Load::make(b, {0}), Store::make(c, {x}, Load::make(a, {x})))}); |
2331 | |
2332 | stmt->accept(&analyzer); |
2333 | |
2334 | /* 0. Input: B[(0, 99)] - dependents: 2 |
2335 | * 1. Input: A[(0, 99)] - dependents: 3 |
2336 | * 2. Load: B[(0, 0)] - depends on: 0 - dependents: 3 4 |
2337 | * 3. Load: A[(0, (B[0]) - 1)] - depends on: 1 2 - dependents: 4 |
2338 | * 4. Store: C[(0, (B[0]) - 1)] - depends on: 2 3 - dependents: 5 |
2339 | * 5. Output: C[(0, 99)] - depends on: 4 |
2340 | */ |
2341 | |
2342 | // Output dependent on A input. |
2343 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2344 | // Also dependent on B input to determine the size of the region written. |
2345 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2346 | |
2347 | auto history = analyzer.getHistory(); |
2348 | ASSERT_EQ(history.size(), 6); |
2349 | |
2350 | // The accesses in the loop depend on the load in the stop condition. |
2351 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2352 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2353 | |
2354 | // Make a load from B to compare against. |
2355 | ExprHandle loadFromB = Load::make(b, {0}); |
2356 | |
2357 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(0, loadFromB - 1)})); |
2358 | ASSERT_TRUE(EQ(history[4]->bounds(), {CB(0, loadFromB - 1)})); |
2359 | } |
2360 | |
2361 | { |
2362 | /* for (int x = B[0]; x < B[1]; x++) { |
2363 | * C[x] = A[x]; |
2364 | * } |
2365 | */ |
2366 | MemDependencyChecker analyzer({a, b}, {c}); |
2367 | StmtPtr stmt = Block::make({For::make( |
2368 | x, |
2369 | Load::make(b, {0}), |
2370 | Load::make(b, {1}), |
2371 | Store::make(c, {x}, Load::make(a, {x})))}); |
2372 | |
2373 | stmt->accept(&analyzer); |
2374 | |
2375 | /* 0. Input: B[(0, 99)] - dependents: 2 3 |
2376 | * 1. Input: A[(0, 99)] - dependents: 4 |
2377 | * 2. Load: B[(0, 0)] - depends on: 0 - dependents: 4 5 |
2378 | * 3. Load: B[(1, 1)] - depends on: 0 - dependents: 4 5 |
2379 | * 4. Load: A[(B[0], (B[1]) - 1)] - depends on: 1 2 3 - dependents: 5 |
2380 | * 5. Store: C[(B[0], (B[1]) - 1)] - depends on: 2 3 4 - dependents: 6 |
2381 | * 6. Output: C[(0, 99)] - depends on: 5 |
2382 | */ |
2383 | |
2384 | // Sanity check output depends on input. |
2385 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2386 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2387 | |
2388 | auto history = analyzer.getHistory(); |
2389 | ASSERT_EQ(history.size(), 7); |
2390 | |
2391 | // The accesses in the loop depend on the load in the start condition. |
2392 | ASSERT_TRUE(history[5]->hasDependency(history[2])); |
2393 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2394 | |
2395 | // also the stop condition. |
2396 | ASSERT_TRUE(history[5]->hasDependency(history[3])); |
2397 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2398 | |
2399 | // Make loads from B to compare against. |
2400 | ExprHandle loadFromB0 = Load::make(b, {0}); |
2401 | ExprHandle loadFromB1 = Load::make(b, {1}); |
2402 | ASSERT_TRUE(EQ(history[4]->bounds(), {CB(loadFromB0, loadFromB1 - 1)})); |
2403 | ASSERT_TRUE(EQ(history[5]->bounds(), {CB(loadFromB0, loadFromB1 - 1)})); |
2404 | } |
2405 | |
2406 | { |
2407 | /* for (int x = 0; x < 10; x++) { |
2408 | * C[x] = A[B[x]]; |
2409 | * } |
2410 | */ |
2411 | MemDependencyChecker analyzer({a, b}, {c}); |
2412 | StmtPtr stmt = Block::make({For::make( |
2413 | x, 0, 10, Store::make(c, {x}, Load::make(a, {Load::make(b, {x})})))}); |
2414 | |
2415 | stmt->accept(&analyzer); |
2416 | |
2417 | /* 0. Input: B[(0, 99)] - dependents: 2 |
2418 | * 1. Input: A[(0, 99)] - dependents: 3 |
2419 | * 2. Load: B[(0, 9)] - depends on: 0 - dependents: 3 4 |
2420 | * 3. Load: A[(B[0], B[9])] - depends on: 1 2 - dependents: 4 |
2421 | * 4. Store: C[(0, 9)] - depends on: 2 3 - dependents: 5 |
2422 | * 5. Output: C[(0, 99)] - depends on: 4 |
2423 | */ |
2424 | |
2425 | // Sanity check output depends on input. |
2426 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2427 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2428 | |
2429 | auto history = analyzer.getHistory(); |
2430 | ASSERT_EQ(history.size(), 6); |
2431 | |
2432 | // The store depends on both loads, the load of A depends on the load of B. |
2433 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2434 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2435 | |
2436 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2437 | |
2438 | // The loads in the indices depend on the relevant input buffer. |
2439 | ASSERT_TRUE(history[3]->hasDependency(history[1])); |
2440 | ASSERT_TRUE(history[2]->hasDependency(history[0])); |
2441 | |
2442 | // The load from B has the loop bounds. |
2443 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(0, 9)})); |
2444 | |
2445 | // The load from A has bounds B[0] to B[9]. |
2446 | ExprHandle loadFromB0 = Load::make(b, {0}); |
2447 | ExprHandle loadFromB9 = Load::make(b, {9}); |
2448 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(loadFromB0, loadFromB9)})); |
2449 | } |
2450 | |
2451 | { |
2452 | /* for (int x = 0; x < 10; x++) { |
2453 | * C[B[x]] = A[x]; |
2454 | * } |
2455 | */ |
2456 | MemDependencyChecker analyzer({a, b}, {c}); |
2457 | StmtPtr stmt = Block::make({For::make( |
2458 | x, 0, 10, Store::make(c, {Load::make(b, {x})}, Load::make(a, {x})))}); |
2459 | |
2460 | stmt->accept(&analyzer); |
2461 | |
2462 | /* 0. Input: B[(0, 99)] - dependents: 3 |
2463 | * 1. Input: A[(0, 99)] - dependents: 2 |
2464 | * 2. Load: A[(0, 9)] - depends on: 1 - dependents: 4 |
2465 | * 3. Load: B[(0, 9)] - depends on: 0 - dependents: 4 |
2466 | * 4. Store: C[(B[0], B[9])] - depends on: 2 3 - dependents: 5 |
2467 | * 5. Output: C[(0, 99)] - depends on: 4 |
2468 | */ |
2469 | // Sanity check output depends on input. |
2470 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2471 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2472 | |
2473 | auto history = analyzer.getHistory(); |
2474 | ASSERT_EQ(history.size(), 6); |
2475 | |
2476 | // The store depends on both loads, neither load is dependent. |
2477 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2478 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2479 | |
2480 | ASSERT_FALSE(history[3]->hasDependency(history[2])); |
2481 | ASSERT_FALSE(history[2]->hasDependency(history[3])); |
2482 | |
2483 | // The loads each depend on their relevant input. (but accesses are in a |
2484 | // different order than the last case). |
2485 | ASSERT_TRUE(history[3]->hasDependency(history[0])); |
2486 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2487 | |
2488 | // The load from B has the loop bounds. |
2489 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(0, 9)})); |
2490 | |
2491 | // And so does the load from A. |
2492 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(0, 9)})); |
2493 | } |
2494 | |
2495 | { |
2496 | /* for (int x = 0; x < 10; x++) { |
2497 | * C[B[A[x]]] = x; |
2498 | * } |
2499 | */ |
2500 | MemDependencyChecker analyzer({a, b}, {c}); |
2501 | StmtPtr stmt = Block::make({For::make( |
2502 | x, 0, 10, Store::make(c, {Load::make(b, {Load::make(a, {x})})}, x))}); |
2503 | |
2504 | stmt->accept(&analyzer); |
2505 | |
2506 | /* 0. Input: B[(0, 99)] - dependents: 3 |
2507 | * 1. Input: A[(0, 99)] - dependents: 2 |
2508 | * 2. Load: A[(0, 9)] - depends on: 1 - dependents: 3 4 |
2509 | * 3. Load: B[(A[0], A[9])] - depends on: 0 2 - dependents: 4 |
2510 | * 4. Store: C[(B[A[0]], B[A[9]])] - depends on: 2 3 - dependents: 5 |
2511 | * 5. Output: C[(0, 99)] - depends on: 4 |
2512 | */ |
2513 | |
2514 | // Sanity check output depends on input. |
2515 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), a.node())); |
2516 | ASSERT_TRUE(analyzer.dependsIndirectly(c.node(), b.node())); |
2517 | |
2518 | auto history = analyzer.getHistory(); |
2519 | ASSERT_EQ(history.size(), 6); |
2520 | |
2521 | // The store depends on both loads. |
2522 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2523 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2524 | |
2525 | // The outer load depends on the inner. |
2526 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2527 | |
2528 | // The loads each depend on their relevant input. (but accesses are in a |
2529 | // different order than the last case). |
2530 | ASSERT_TRUE(history[3]->hasDependency(history[0])); |
2531 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2532 | |
2533 | // The load from A has the loop bounds. |
2534 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(0, 9)})); |
2535 | // The load from B as bounds A[0] to A[9]. |
2536 | ExprHandle loadFromA0 = Load::make(a, {0}); |
2537 | ExprHandle loadFromA9 = Load::make(a, {9}); |
2538 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(loadFromA0, loadFromA9)})); |
2539 | |
2540 | // The store has bounds of B[A[0]] to B[A[9]]. |
2541 | ExprHandle loadFromBA0 = Load::make(b, {loadFromA0}); |
2542 | ExprHandle loadFromBA9 = Load::make(b, {loadFromA9}); |
2543 | ASSERT_TRUE(EQ(history[4]->bounds(), {CB(loadFromBA0, loadFromBA9)})); |
2544 | } |
2545 | } |
2546 | |
2547 | // Verify multi dimensional bounds work. |
2548 | TEST(MemDependency, MemDependencyCheckerMultiDim) { |
2549 | int M = 10, N = 9, K = 12; |
2550 | BufHandle a("A" , {M, N, K}, kInt); |
2551 | BufHandle b("B" , {M, N, K}, kInt); |
2552 | BufHandle c("C" , {M, K}, kInt); |
2553 | VarHandle x("x" , kInt); |
2554 | VarHandle y("y" , kInt); |
2555 | VarHandle z("z" , kInt); |
2556 | |
2557 | using namespace analysis; |
2558 | |
2559 | auto CB = [](ExprHandle s, ExprHandle e) { |
2560 | return Bound(s.node(), e.node()); |
2561 | }; |
2562 | |
2563 | auto EQ = [](const IndexBounds& x, const IndexBounds& y) { |
2564 | return indexBoundsEquals(x, y); |
2565 | }; |
2566 | |
2567 | { |
2568 | /* for (int x = 0; x < 10; x++) { |
2569 | * for (int y = 0; y < 9; y++) { |
2570 | * for (int z = 0; z < 12; z++) { |
2571 | * B[x, y, z] = A[x, y, z]; |
2572 | * } |
2573 | * } |
2574 | * } |
2575 | */ |
2576 | // Full range. |
2577 | |
2578 | MemDependencyChecker analyzer({a}, {b}); |
2579 | StmtPtr stmt = Block::make({For::make( |
2580 | x, |
2581 | 0, |
2582 | M, |
2583 | For::make( |
2584 | y, |
2585 | 0, |
2586 | N, |
2587 | For::make( |
2588 | z, |
2589 | 0, |
2590 | K, |
2591 | Store::make(b, {x, y, z}, Load::make(a, {x, y, z})))))}); |
2592 | |
2593 | stmt->accept(&analyzer); |
2594 | |
2595 | // Sanity test: Output depends on input. |
2596 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2597 | |
2598 | // 4 accesses: input, load, store, output. |
2599 | auto history = analyzer.getHistory(); |
2600 | ASSERT_EQ(history.size(), 4); |
2601 | |
2602 | // Simple chain from input to output. |
2603 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2604 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2605 | ASSERT_TRUE(history[1]->hasDependency(history[0])); |
2606 | |
2607 | ASSERT_TRUE( |
2608 | EQ(history[1]->bounds(), {CB(0, M - 1), CB(0, N - 1), CB(0, K - 1)})); |
2609 | ASSERT_TRUE( |
2610 | EQ(history[2]->bounds(), {CB(0, M - 1), CB(0, N - 1), CB(0, K - 1)})); |
2611 | } |
2612 | |
2613 | { |
2614 | /* for (int x = 0; x < 5; x++) { |
2615 | * for (int y = 0; y < 5; y++) { |
2616 | * for (int z = 0; z < 5; z++) { |
2617 | * B[x, y, z] = A[x, y, z]; |
2618 | * } |
2619 | * } |
2620 | * } |
2621 | */ |
2622 | // Partial range. |
2623 | |
2624 | MemDependencyChecker analyzer({a}, {b}); |
2625 | StmtPtr stmt = Block::make({For::make( |
2626 | x, |
2627 | 0, |
2628 | 5, |
2629 | For::make( |
2630 | y, |
2631 | 0, |
2632 | 5, |
2633 | For::make( |
2634 | z, |
2635 | 0, |
2636 | 5, |
2637 | Store::make(b, {x, y, z}, Load::make(a, {x, y, z})))))}); |
2638 | |
2639 | stmt->accept(&analyzer); |
2640 | |
2641 | // Sanity test: Output depends on input. |
2642 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2643 | |
2644 | // 4 accesses: input, load, store, output. |
2645 | auto history = analyzer.getHistory(); |
2646 | ASSERT_EQ(history.size(), 4); |
2647 | |
2648 | // Simple chain from input to output. |
2649 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2650 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2651 | ASSERT_TRUE(history[1]->hasDependency(history[0])); |
2652 | |
2653 | ASSERT_TRUE(EQ(history[1]->bounds(), {CB(0, 4), CB(0, 4), CB(0, 4)})); |
2654 | ASSERT_TRUE(EQ(history[2]->bounds(), {CB(0, 4), CB(0, 4), CB(0, 4)})); |
2655 | } |
2656 | |
2657 | { |
2658 | /* for (int x = 0; x < 10; x++) { |
2659 | * for (int y = 0; y < 12; y++) { |
2660 | * B[x, 0, y] = A[x, 0, y]; |
2661 | * } |
2662 | * } |
2663 | */ |
2664 | |
2665 | // Partial loops. |
2666 | |
2667 | MemDependencyChecker analyzer({a}, {b}); |
2668 | StmtPtr stmt = Block::make({For::make( |
2669 | x, |
2670 | 0, |
2671 | N, |
2672 | For::make( |
2673 | y, 0, K, Store::make(b, {x, 0, y}, Load::make(a, {x, 0, y}))))}); |
2674 | |
2675 | stmt->accept(&analyzer); |
2676 | |
2677 | // Sanity test: Output depends on input. |
2678 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2679 | |
2680 | // 4 accesses: input, load, store, output. |
2681 | auto history = analyzer.getHistory(); |
2682 | ASSERT_EQ(history.size(), 4); |
2683 | |
2684 | // Simple chain from input to output. |
2685 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2686 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2687 | ASSERT_TRUE(history[1]->hasDependency(history[0])); |
2688 | |
2689 | ASSERT_TRUE( |
2690 | EQ(history[1]->bounds(), {CB(0, N - 1), CB(0, 0), CB(0, K - 1)})); |
2691 | ASSERT_TRUE( |
2692 | EQ(history[2]->bounds(), {CB(0, N - 1), CB(0, 0), CB(0, K - 1)})); |
2693 | } |
2694 | |
2695 | { |
2696 | /* for (int x = 0; x < 10; x++) { |
2697 | * for (int y = 0; y < 100; y++) { |
2698 | * for (int z = 0; z < 12; z++) { |
2699 | * B[x, 0, z] = (A[x, 0, z]) + (C[x, z]); |
2700 | * } |
2701 | * } |
2702 | * } |
2703 | */ |
2704 | |
2705 | // Loops that don't correspond to an index, bufs with different |
2706 | // dimensionality. |
2707 | |
2708 | MemDependencyChecker analyzer({a, c}, {b}); |
2709 | StmtPtr stmt = Block::make({For::make( |
2710 | x, |
2711 | 0, |
2712 | M, |
2713 | For::make( |
2714 | y, |
2715 | 0, |
2716 | 100, |
2717 | For::make( |
2718 | z, |
2719 | 0, |
2720 | K, |
2721 | Store::make( |
2722 | b, |
2723 | {x, 0, z}, |
2724 | Add::make( |
2725 | Load::make(a, {x, 0, z}), Load::make(c, {x, z}))))))}); |
2726 | |
2727 | stmt->accept(&analyzer); |
2728 | |
2729 | // Sanity test: Output depends on both inputs. |
2730 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2731 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), c.node())); |
2732 | |
2733 | // 6 accesses: 2 inputs, 2 loads, store, output. |
2734 | auto history = analyzer.getHistory(); |
2735 | ASSERT_EQ(history.size(), 6); |
2736 | |
2737 | // Simple chain from input to output over the A buf. |
2738 | // history[0] is the C input, history[3] is the load from C. |
2739 | ASSERT_TRUE(history[5]->hasDependency(history[4])); |
2740 | ASSERT_TRUE(history[4]->hasDependency(history[2])); |
2741 | ASSERT_TRUE(history[2]->hasDependency(history[1])); |
2742 | // The store also depends on the load from the C input. |
2743 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2744 | ASSERT_TRUE(history[3]->hasDependency(history[0])); |
2745 | |
2746 | // A Buf accesses. |
2747 | ASSERT_TRUE( |
2748 | EQ(history[4]->bounds(), {CB(0, M - 1), CB(0, 0), CB(0, K - 1)})); |
2749 | ASSERT_TRUE( |
2750 | EQ(history[2]->bounds(), {CB(0, M - 1), CB(0, 0), CB(0, K - 1)})); |
2751 | |
2752 | // C buf access. |
2753 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(0, M - 1), CB(0, K - 1)})); |
2754 | } |
2755 | |
2756 | { |
2757 | /* for (int x = 0; x < 9; x++) { |
2758 | * for (int y = 0; y < 10; y++) { |
2759 | * for (int z = 0; z < 12; z++) { |
2760 | * B[x, 0, 0] = (B[x, y, z]) + (A[x, y, z]); |
2761 | * } |
2762 | * } |
2763 | * } |
2764 | */ |
2765 | // Multi-dim reductions. |
2766 | |
2767 | MemDependencyChecker analyzer({a}, {b}); |
2768 | StmtPtr stmt = Block::make({For::make( |
2769 | x, |
2770 | 0, |
2771 | M, |
2772 | For::make( |
2773 | y, |
2774 | 0, |
2775 | N, |
2776 | For::make( |
2777 | z, |
2778 | 0, |
2779 | K, |
2780 | Store::make( |
2781 | b, |
2782 | {x, 0, 0}, |
2783 | Add::make( |
2784 | Load::make(b, {x, y, z}), |
2785 | Load::make(a, {x, y, z}))))))}); |
2786 | |
2787 | stmt->accept(&analyzer); |
2788 | |
2789 | // Sanity test: Output depends on input. |
2790 | ASSERT_TRUE(analyzer.dependsIndirectly(b.node(), a.node())); |
2791 | |
2792 | // 4 accesses: input, 2 loads, store, output. |
2793 | auto history = analyzer.getHistory(); |
2794 | ASSERT_EQ(history.size(), 5); |
2795 | |
2796 | // Simple chain from input to output. |
2797 | ASSERT_TRUE(history[4]->hasDependency(history[3])); |
2798 | ASSERT_TRUE(history[3]->hasDependency(history[2])); |
2799 | ASSERT_TRUE(history[3]->hasDependency(history[1])); |
2800 | ASSERT_TRUE(history[2]->hasDependency(history[0])); |
2801 | |
2802 | // The load from B depends on the store to B. |
2803 | ASSERT_TRUE(history[1]->hasDependency(history[3])); |
2804 | |
2805 | ASSERT_TRUE( |
2806 | EQ(history[1]->bounds(), {CB(0, M - 1), CB(0, N - 1), CB(0, K - 1)})); |
2807 | ASSERT_TRUE( |
2808 | EQ(history[2]->bounds(), {CB(0, M - 1), CB(0, N - 1), CB(0, K - 1)})); |
2809 | ASSERT_TRUE(EQ(history[3]->bounds(), {CB(0, M - 1), CB(0, 0), CB(0, 0)})); |
2810 | } |
2811 | } |
2812 | |
2813 | // Various tests using the external Compute/Reduce API. |
2814 | TEST(MemDependency, MemDependencyCheckerComputeAPI) { |
2815 | using namespace analysis; |
2816 | |
2817 | /* for (int m = 0; m < 4; m++) { |
2818 | * for (int n = 0; n < 5; n++) { |
2819 | * for (int k = 0; k < 6; k++) { |
2820 | * broadcast_add[m, n, k] = (a[m, n]) + (b[n, k]); |
2821 | * } |
2822 | * } |
2823 | * } |
2824 | * for (int m_1 = 0; m_1 < 4; m_1++) { |
2825 | * for (int n_1 = 0; n_1 < 5; n_1++) { |
2826 | * for (int k_1 = 0; k_1 < 6; k_1++) { |
2827 | * d[m_1, n_1, k_1] = (broadcast_add(m_1, n_1, k_1)) + float(1); |
2828 | * } |
2829 | * } |
2830 | * } |
2831 | */ |
2832 | |
2833 | // Can determine if 2 loops created by Compute are dependent. |
2834 | BufHandle a_buf("a" , {4, 5}, kFloat); |
2835 | BufHandle b_buf("b" , {5, 6}, kFloat); |
2836 | Tensor c = Compute( |
2837 | "broadcast_add" , |
2838 | {4, 5, 6}, |
2839 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2840 | return a_buf.load(m, n) + b_buf.load(n, k); |
2841 | }); |
2842 | Tensor d = Compute( |
2843 | "d" , |
2844 | {4, 5, 6}, |
2845 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2846 | return c.load(m, n, k) + 1; |
2847 | }); |
2848 | |
2849 | LoopNest l({d}, {c, d}); |
2850 | |
2851 | MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()}); |
2852 | |
2853 | l.root_stmt()->accept(&analyzer); |
2854 | |
2855 | // Sanity test: Output depends on input. |
2856 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node())); |
2857 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node())); |
2858 | |
2859 | // Second loop depends on first loop. |
2860 | auto c_loop = l.getLoopStmtsFor(c)[0]; |
2861 | auto d_loop = l.getLoopStmtsFor(d)[0]; |
2862 | ASSERT_TRUE(analyzer.dependsDirectly(d_loop, c_loop)); |
2863 | } |
2864 | |
2865 | TEST(MemDependency, MemDependencyCheckerComputeInline) { |
2866 | using namespace analysis; |
2867 | |
2868 | /* for (int m = 0; m < 4; m++) { |
2869 | * for (int n = 0; n < 5; n++) { |
2870 | * for (int k = 0; k < 6; k++) { |
2871 | * d[m, n, k] = ((a[m, n]) + (b[n, k])) + float(1); |
2872 | * } |
2873 | * } |
2874 | * } |
2875 | */ |
2876 | |
2877 | // Check inlining affects the number of accesses returned. |
2878 | |
2879 | BufHandle a_buf("a" , {4, 5}, kFloat); |
2880 | BufHandle b_buf("b" , {5, 6}, kFloat); |
2881 | Tensor c = Compute( |
2882 | "broadcast_add" , |
2883 | {4, 5, 6}, |
2884 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2885 | return a_buf.load(m, n) + b_buf.load(n, k); |
2886 | }); |
2887 | Tensor d = Compute( |
2888 | "d" , |
2889 | {4, 5, 6}, |
2890 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2891 | return c.load(m, n, k) + 1; |
2892 | }); |
2893 | |
2894 | LoopNest l({d}, {c, d}); |
2895 | l.computeInline(c.buf()); |
2896 | |
2897 | MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()}); |
2898 | l.root_stmt()->accept(&analyzer); |
2899 | |
2900 | // Sanity test: Output depends on input. |
2901 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node())); |
2902 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node())); |
2903 | |
2904 | // broadcast_add tensor should not appear in trace at all. |
2905 | for (auto& wi : analyzer.getHistory()) { |
2906 | ASSERT_NE(wi->var(), c.buf()->base_handle()); |
2907 | } |
2908 | } |
2909 | |
2910 | TEST(MemDependency, MemDependencyCheckerComputeSplit) { |
2911 | using namespace analysis; |
2912 | // Split an axis, so the number of loops != the number of dimensions. |
2913 | |
2914 | BufHandle a_buf("a" , {4, 5}, kFloat); |
2915 | BufHandle b_buf("b" , {5, 6}, kFloat); |
2916 | Tensor c = Compute( |
2917 | "broadcast_add" , |
2918 | {4, 5, 6}, |
2919 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2920 | return a_buf.load(m, n) + b_buf.load(n, k); |
2921 | }); |
2922 | |
2923 | LoopNest l({c}); |
2924 | |
2925 | MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()}); |
2926 | l.root_stmt()->accept(&analyzer_before); |
2927 | |
2928 | l.splitWithTail(l.getLoopStmtsFor(c)[0], 2); |
2929 | |
2930 | MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()}); |
2931 | StmtPtr stmt = IRSimplifier::simplify(l.root_stmt()); |
2932 | stmt->accept(&analyzer_after); |
2933 | |
2934 | // Splitting should not change accesses at all. |
2935 | auto history_before = analyzer_before.getHistory(); |
2936 | auto history_after = analyzer_after.getHistory(); |
2937 | |
2938 | ASSERT_EQ(history_before.size(), history_after.size()); |
2939 | |
2940 | for (size_t i = 0; i < history_before.size(); ++i) { |
2941 | ASSERT_EQ(history_before[i]->type(), history_after[i]->type()); |
2942 | ASSERT_EQ(history_before[i]->var(), history_after[i]->var()); |
2943 | ASSERT_EQ( |
2944 | history_before[i]->bounds().size(), history_after[i]->bounds().size()); |
2945 | ASSERT_TRUE(indexBoundsEquals( |
2946 | history_before[i]->bounds(), history_after[i]->bounds())); |
2947 | ASSERT_EQ( |
2948 | history_before[i]->dependencies().size(), |
2949 | history_after[i]->dependencies().size()); |
2950 | ASSERT_EQ( |
2951 | history_before[i]->dependents().size(), |
2952 | history_after[i]->dependents().size()); |
2953 | } |
2954 | } |
2955 | |
2956 | TEST(MemDependency, MemDependencyCheckerComputeReorder) { |
2957 | using namespace analysis; |
2958 | // Reorder an axis, so the loop order doesn't match the indexing order. |
2959 | |
2960 | BufHandle a_buf("a" , {4, 5}, kFloat); |
2961 | BufHandle b_buf("b" , {5, 6}, kFloat); |
2962 | Tensor c = Compute( |
2963 | "broadcast_add" , |
2964 | {4, 5, 6}, |
2965 | [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { |
2966 | return a_buf.load(m, n) + b_buf.load(n, k); |
2967 | }); |
2968 | |
2969 | LoopNest l({c}); |
2970 | |
2971 | MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()}); |
2972 | l.root_stmt()->accept(&analyzer_before); |
2973 | |
2974 | auto loops = l.getLoopStmtsFor(c); |
2975 | l.reorderAxis(loops[0], loops[1]); |
2976 | |
2977 | MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()}); |
2978 | StmtPtr stmt = IRSimplifier::simplify(l.root_stmt()); |
2979 | stmt->accept(&analyzer_after); |
2980 | |
2981 | // Reordering should not change accesses at all. |
2982 | auto history_before = analyzer_before.getHistory(); |
2983 | auto history_after = analyzer_after.getHistory(); |
2984 | |
2985 | ASSERT_EQ(history_before.size(), history_after.size()); |
2986 | |
2987 | for (size_t i = 0; i < history_before.size(); ++i) { |
2988 | ASSERT_EQ(history_before[i]->type(), history_after[i]->type()); |
2989 | ASSERT_EQ(history_before[i]->var(), history_after[i]->var()); |
2990 | ASSERT_EQ( |
2991 | history_before[i]->bounds().size(), history_after[i]->bounds().size()); |
2992 | ASSERT_TRUE(indexBoundsEquals( |
2993 | history_before[i]->bounds(), history_after[i]->bounds())); |
2994 | ASSERT_EQ( |
2995 | history_before[i]->dependencies().size(), |
2996 | history_after[i]->dependencies().size()); |
2997 | ASSERT_EQ( |
2998 | history_before[i]->dependents().size(), |
2999 | history_after[i]->dependents().size()); |
3000 | } |
3001 | } |
3002 | |
3003 | TEST(MemDependency, MemDependencyCheckerComputeReduce) { |
3004 | using namespace analysis; |
3005 | /* for (int l2 = 0; l2 < 2; l2++) { |
3006 | * for (int n1 = 0; n1 < 3; n1++) { |
3007 | * for (int m1 = 0; m1 < 6; m1++) { |
3008 | * scale[l2, n1, m1] = (b[l2, n1, m1]) * (a[l2, n1, m1]); |
3009 | * } |
3010 | * } |
3011 | * } |
3012 | * for (int l1 = 0; l1 < 2; l1++) { |
3013 | * sum[l1] = float(0); |
3014 | * for (int n1_1 = 0; n1_1 < 3; n1_1++) { |
3015 | * for (int m1_1 = 0; m1_1 < 6; m1_1++) { |
3016 | * sum[l1] = ReduceOp(sum, (sum[l1]) + (scale(l1, n1_1, m1_1)), |
3017 | * out_args={l1}, reduce_args={n1, m1}); |
3018 | * } |
3019 | * } |
3020 | * } |
3021 | */ |
3022 | |
3023 | // Can determine dependencies of a Reduction. |
3024 | |
3025 | BufHandle a("a" , {2, 3, 6}, kFloat); |
3026 | BufHandle b("b" , {2, 3, 6}, kFloat); |
3027 | |
3028 | Tensor c = Compute( |
3029 | "scale" , |
3030 | {2, 3, 6}, |
3031 | [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { |
3032 | return b.load(l, n, m) * a.load(l, n, m); |
3033 | }); |
3034 | Tensor d = Reduce("sum" , {2}, Sum(), c, {3, 6}); |
3035 | LoopNest l({d}, {c, d}); |
3036 | |
3037 | MemDependencyChecker analyzer({a.node(), b.node()}, {d.buf()}); |
3038 | |
3039 | l.root_stmt()->accept(&analyzer); |
3040 | |
3041 | // Sanity test: Output depends on input. |
3042 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.node())); |
3043 | ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.node())); |
3044 | |
3045 | // Second loop depends on first loop. |
3046 | auto c_loop = l.getLoopStmtsFor(c)[0]; |
3047 | auto d_loop = l.getLoopStmtsFor(d)[0]; |
3048 | ASSERT_TRUE(analyzer.dependsDirectly(d_loop, c_loop)); |
3049 | |
3050 | // Reduction depends on both inputs. |
3051 | auto reduces = NodeFinder<ReduceOp>::find(l.root_stmt()); |
3052 | ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], a.node())); |
3053 | ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], b.node())); |
3054 | } |
3055 | |
3056 | TEST(MemDependency, MemDependencyCheckerComputeGEMM) { |
3057 | int M = 1024; |
3058 | int N = 1024; |
3059 | int K = 2048; |
3060 | using namespace analysis; |
3061 | |
3062 | BufHandle AP("A" , {M, K}, kFloat); |
3063 | BufHandle BP("B" , {K, N}, kFloat); |
3064 | Tensor CT = Reduce( |
3065 | "gemm" , |
3066 | {M, N}, |
3067 | Sum(), |
3068 | [&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& k) { |
3069 | return AP.load(m, k) * BP.load(k, n); |
3070 | }, |
3071 | {K}); |
3072 | LoopNest loop({CT}); |
3073 | |
3074 | { |
3075 | auto const& loops = loop.getLoopStmtsFor(CT); |
3076 | ForPtr m = loops[0]; |
3077 | loop.splitWithMask(m, 4); |
3078 | } |
3079 | { |
3080 | auto const& loops = loop.getLoopStmtsFor(CT); |
3081 | ForPtr n = loops[2]; |
3082 | loop.splitWithMask(n, 16); |
3083 | } |
3084 | // mo, mi, no, ni, k -> |
3085 | // mo, no, mi, ni, k |
3086 | { |
3087 | auto const& loops = loop.getLoopStmtsFor(CT); |
3088 | ForPtr mi = loops[1]; |
3089 | ForPtr no = loops[2]; |
3090 | loop.reorderAxis(mi, no); |
3091 | } |
3092 | // mo, no, mi, ni, k -> |
3093 | // mo, no, mi, k, ni |
3094 | { |
3095 | auto const& loops = loop.getLoopStmtsFor(CT); |
3096 | ForPtr ni = loops[3]; |
3097 | ForPtr k = loops[4]; |
3098 | loop.reorderAxis(ni, k); |
3099 | } |
3100 | // mo, no, mi, k, ni -> |
3101 | // mo, no, k, mi, ni |
3102 | { |
3103 | auto const& loops = loop.getLoopStmtsFor(CT); |
3104 | ForPtr mi = loops[2]; |
3105 | ForPtr k = loops[3]; |
3106 | loop.reorderAxis(mi, k); |
3107 | } |
3108 | { |
3109 | auto const& loops = loop.getLoopStmtsFor(CT); |
3110 | loop.cacheAccesses(CT.buf(), "C_regs" , loops[2]); |
3111 | } |
3112 | |
3113 | MemDependencyChecker analyzer_unlowered( |
3114 | loop.getInputBufs(), loop.getOutputBufs()); |
3115 | |
3116 | MemDependencyChecker analyzer_lowered( |
3117 | loop.getInputBufs(), loop.getOutputBufs()); |
3118 | |
3119 | // Test both unlowered and lowered form. |
3120 | { |
3121 | StmtPtr stmt = IRSimplifier::simplify(loop.root_stmt()); |
3122 | stmt->accept(&analyzer_unlowered); |
3123 | |
3124 | // Outputs depend on inputs. |
3125 | ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.node())); |
3126 | ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.node())); |
3127 | |
3128 | // The last write to gemm should cover the total bound of the output. |
3129 | std::shared_ptr<AccessInfo> outputAccess = |
3130 | analyzer_unlowered.output(CT.buf()); |
3131 | // A single dependency. |
3132 | ASSERT_EQ(outputAccess->dependencies().size(), 1); |
3133 | |
3134 | // dependencies is a set with 1 element, so can just deref begin(). |
3135 | std::shared_ptr<AccessInfo> gemmStore = |
3136 | outputAccess->dependencies().begin()->second; |
3137 | // Check its a store. |
3138 | ASSERT_EQ(gemmStore->type(), AccessType::Store); |
3139 | |
3140 | ASSERT_TRUE(indexBoundsEquals(outputAccess->bounds(), gemmStore->bounds())); |
3141 | |
3142 | // Likewise the first read from each input cover the entire range of the |
3143 | // input. |
3144 | auto aInput = analyzer_unlowered.input(AP.node()); |
3145 | auto bInput = analyzer_unlowered.input(BP.node()); |
3146 | |
3147 | // A single dependent each. |
3148 | ASSERT_EQ(aInput->dependents().size(), 1); |
3149 | ASSERT_EQ(bInput->dependents().size(), 1); |
3150 | |
3151 | // They're both loads. |
3152 | std::shared_ptr<AccessInfo> aLoad = aInput->dependents().begin()->second; |
3153 | std::shared_ptr<AccessInfo> bLoad = bInput->dependents().begin()->second; |
3154 | ASSERT_EQ(aLoad->type(), AccessType::Load); |
3155 | ASSERT_EQ(bLoad->type(), AccessType::Load); |
3156 | |
3157 | ASSERT_TRUE(indexBoundsEquals(aInput->bounds(), aLoad->bounds())); |
3158 | ASSERT_TRUE(indexBoundsEquals(bInput->bounds(), bLoad->bounds())); |
3159 | } |
3160 | |
3161 | loop.prepareForCodegen(); |
3162 | SimpleIREvaluator cg(loop.root_stmt(), {AP, BP, CT}); |
3163 | |
3164 | // now check lowered dependency graph. |
3165 | { |
3166 | StmtPtr stmt = IRSimplifier::simplify(cg.stmt()); |
3167 | stmt->accept(&analyzer_lowered); |
3168 | |
3169 | // Lowering will change the dimensionality of all bounds due to index |
3170 | // flattening and will insert Allocates and Frees. |
3171 | |
3172 | auto history_before = analyzer_unlowered.getHistory(); |
3173 | auto history_after = analyzer_lowered.getHistory(); |
3174 | |
3175 | ASSERT_EQ(history_before.size() + 2, history_after.size()); |
3176 | |
3177 | // Filter out the alloc/free; |
3178 | auto isAllocFree = [](const auto& info) { |
3179 | return info->type() == AccessType::Alloc || |
3180 | info->type() == AccessType::Free; |
3181 | }; |
3182 | history_after.erase( |
3183 | std::remove_if(history_after.begin(), history_after.end(), isAllocFree), |
3184 | history_after.end()); |
3185 | |
3186 | ASSERT_EQ(history_before.size(), history_after.size()); |
3187 | |
3188 | for (size_t i = 0; i < history_before.size(); ++i) { |
3189 | ASSERT_EQ(history_before[i]->type(), history_after[i]->type()); |
3190 | ASSERT_EQ(history_before[i]->var(), history_after[i]->var()); |
3191 | |
3192 | if (history_before[i]->dependencies().size() != |
3193 | history_after[i]->dependencies().size()) { |
3194 | // Must depend on an Alloc. |
3195 | ASSERT_TRUE(std::any_of( |
3196 | history_after[i]->dependencies().begin(), |
3197 | history_after[i]->dependencies().end(), |
3198 | [](const auto& pair) { |
3199 | return pair.second->type() == AccessType::Alloc; |
3200 | })); |
3201 | |
3202 | ASSERT_EQ( |
3203 | history_before[i]->dependencies().size() + 1, |
3204 | history_after[i]->dependencies().size()); |
3205 | } |
3206 | |
3207 | if (history_before[i]->dependents().size() != |
3208 | history_after[i]->dependents().size()) { |
3209 | // Must depend on an Free. |
3210 | ASSERT_TRUE(std::any_of( |
3211 | history_after[i]->dependents().begin(), |
3212 | history_after[i]->dependents().end(), |
3213 | [](const auto& pair) { |
3214 | return pair.second->type() == AccessType::Free; |
3215 | })); |
3216 | |
3217 | ASSERT_EQ( |
3218 | history_before[i]->dependents().size() + 1, |
3219 | history_after[i]->dependents().size()); |
3220 | } |
3221 | |
3222 | // Inputs and outputs are not flattened, only accesses. |
3223 | if (history_before[i]->type() == AccessType::Input || |
3224 | history_before[i]->type() == AccessType::Output) { |
3225 | ASSERT_EQ( |
3226 | history_before[i]->bounds().size(), |
3227 | history_after[i]->bounds().size()); |
3228 | ASSERT_TRUE(indexBoundsEquals( |
3229 | history_before[i]->bounds(), history_after[i]->bounds())); |
3230 | } else { |
3231 | ASSERT_EQ(history_after[i]->bounds().size(), 1); |
3232 | ExprPtr flat_bounds = alloc<IntImm>(1); |
3233 | |
3234 | for (auto& b : history_before[i]->bounds()) { |
3235 | flat_bounds = |
3236 | alloc<Mul>(flat_bounds, alloc<Add>(b.end, alloc<IntImm>(1))); |
3237 | |
3238 | // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) |
3239 | ASSERT_TRUE(exprEquals(b.start, history_after[i]->bounds()[0].start)); |
3240 | } |
3241 | |
3242 | flat_bounds = IRSimplifier::simplify(flat_bounds); |
3243 | ExprPtr after_bounds = IRSimplifier::simplify( |
3244 | alloc<Add>(history_after[i]->bounds()[0].end, alloc<IntImm>(1))); |
3245 | ASSERT_TRUE(exprEquals(flat_bounds, after_bounds)); |
3246 | } |
3247 | } |
3248 | } |
3249 | } |
3250 | |
3251 | } // namespace jit |
3252 | } // namespace torch |
3253 | |