1/* <copyright>
2 This file is provided under a dual BSD/GPLv2 license. When using or
3 redistributing this file, you may do so under either license.
4
5 GPL LICENSE SUMMARY
6
7 Copyright (c) 2017-2020 Intel Corporation. All rights reserved.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of version 2 of the GNU General Public License as
11 published by the Free Software Foundation.
12
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 The full GNU General Public License is included in this distribution
22 in the file called LICENSE.GPL.
23
24 Contact Information:
25 http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
26
27 BSD LICENSE
28
29 Copyright (c) 2017-2020 Intel Corporation. All rights reserved.
30 All rights reserved.
31
32 Redistribution and use in source and binary forms, with or without
33 modification, are permitted provided that the following conditions
34 are met:
35
36 * Redistributions of source code must retain the above copyright
37 notice, this list of conditions and the following disclaimer.
38 * Redistributions in binary form must reproduce the above copyright
39 notice, this list of conditions and the following disclaimer in
40 the documentation and/or other materials provided with the
41 distribution.
42 * Neither the name of Intel Corporation nor the names of its
43 contributors may be used to endorse or promote products derived
44 from this software without specific prior written permission.
45
46 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
49 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
50 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
51 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
52 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
56 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57</copyright> */
58
59// /////////////////////////////////////////////////////////////////////////
60////// Intel Processor Trace Marker Functionality
61////////////////////////////////////////////////////////////////////////////
62
63 .text
64 .align 16
65 .globl __itt_pt_mark
66 .globl __itt_pt_event
67 .globl __itt_pt_mark_event
68 .globl __itt_pt_mark_threshold
69 .globl __itt_pt_byte
70 .globl __itt_pt_write
71
72/// void __itt_pt_mark(unsigned char index);
73__itt_pt_mark:
74__itt_pt_mark_int:
75 and $0xff, %rdi
76 call __itt_pt_mark_pic
77__itt_pt_mark_pic:
78 popq %rax
79 lea (__itt_pt_mark_call_table - __itt_pt_mark_pic) (%rax,%rdi,4), %rdi
80 jmp *%rdi
81
82 .long 0, 1, 2, 3 // GUID
83 .long 0xfadefade
84
85__itt_pt_mark_call_table:
86 retq
87 retq $0x0
88 retq
89 retq $0x1
90 retq
91 retq $0x2
92 retq
93 retq $0x3
94 retq
95 retq $0x4
96 retq
97 retq $0x5
98 retq
99 retq $0x6
100 retq
101 retq $0x7
102 retq
103 retq $0x8
104 retq
105 retq $0x9
106 retq
107 retq $0xa
108 retq
109 retq $0xb
110 retq
111 retq $0xc
112 retq
113 retq $0xd
114 retq
115 retq $0xe
116 retq
117 retq $0xf
118
119 retq
120 retq $0x10
121 retq
122 retq $0x11
123 retq
124 retq $0x12
125 retq
126 retq $0x13
127 retq
128 retq $0x14
129 retq
130 retq $0x15
131 retq
132 retq $0x16
133 retq
134 retq $0x17
135 retq
136 retq $0x18
137 retq
138 retq $0x19
139 retq
140 retq $0x1a
141 retq
142 retq $0x1b
143 retq
144 retq $0x1c
145 retq
146 retq $0x1d
147 retq
148 retq $0x1e
149 retq
150 retq $0x1f
151
152 retq
153 retq $0x20
154 retq
155 retq $0x21
156 retq
157 retq $0x22
158 retq
159 retq $0x23
160 retq
161 retq $0x24
162 retq
163 retq $0x25
164 retq
165 retq $0x26
166 retq
167 retq $0x27
168 retq
169 retq $0x28
170 retq
171 retq $0x29
172 retq
173 retq $0x2a
174 retq
175 retq $0x2b
176 retq
177 retq $0x2c
178 retq
179 retq $0x2d
180 retq
181 retq $0x2e
182 retq
183 retq $0x2f
184
185 retq
186 retq $0x30
187 retq
188 retq $0x31
189 retq
190 retq $0x32
191 retq
192 retq $0x33
193 retq
194 retq $0x34
195 retq
196 retq $0x35
197 retq
198 retq $0x36
199 retq
200 retq $0x37
201 retq
202 retq $0x38
203 retq
204 retq $0x39
205 retq
206 retq $0x3a
207 retq
208 retq $0x3b
209 retq
210 retq $0x3c
211 retq
212 retq $0x3d
213 retq
214 retq $0x3e
215 retq
216 retq $0x3f
217
218 retq
219 retq $0x40
220 retq
221 retq $0x41
222 retq
223 retq $0x42
224 retq
225 retq $0x43
226 retq
227 retq $0x44
228 retq
229 retq $0x45
230 retq
231 retq $0x46
232 retq
233 retq $0x47
234 retq
235 retq $0x48
236 retq
237 retq $0x49
238 retq
239 retq $0x4a
240 retq
241 retq $0x4b
242 retq
243 retq $0x4c
244 retq
245 retq $0x4d
246 retq
247 retq $0x4e
248 retq
249 retq $0x4f
250
251 retq
252 retq $0x50
253 retq
254 retq $0x51
255 retq
256 retq $0x52
257 retq
258 retq $0x53
259 retq
260 retq $0x54
261 retq
262 retq $0x55
263 retq
264 retq $0x56
265 retq
266 retq $0x57
267 retq
268 retq $0x58
269 retq
270 retq $0x59
271 retq
272 retq $0x5a
273 retq
274 retq $0x5b
275 retq
276 retq $0x5c
277 retq
278 retq $0x5d
279 retq
280 retq $0x5e
281 retq
282 retq $0x5f
283
284 retq
285 retq $0x60
286 retq
287 retq $0x61
288 retq
289 retq $0x62
290 retq
291 retq $0x63
292 retq
293 retq $0x64
294 retq
295 retq $0x65
296 retq
297 retq $0x66
298 retq
299 retq $0x67
300 retq
301 retq $0x68
302 retq
303 retq $0x69
304 retq
305 retq $0x6a
306 retq
307 retq $0x6b
308 retq
309 retq $0x6c
310 retq
311 retq $0x6d
312 retq
313 retq $0x6e
314 retq
315 retq $0x6f
316
317 retq
318 retq $0x70
319 retq
320 retq $0x71
321 retq
322 retq $0x72
323 retq
324 retq $0x73
325 retq
326 retq $0x74
327 retq
328 retq $0x75
329 retq
330 retq $0x76
331 retq
332 retq $0x77
333 retq
334 retq $0x78
335 retq
336 retq $0x79
337 retq
338 retq $0x7a
339 retq
340 retq $0x7b
341 retq
342 retq $0x7c
343 retq
344 retq $0x7d
345 retq
346 retq $0x7e
347 retq
348 retq $0x7f
349
350 retq
351 retq $0x80
352 retq
353 retq $0x81
354 retq
355 retq $0x82
356 retq
357 retq $0x83
358 retq
359 retq $0x84
360 retq
361 retq $0x85
362 retq
363 retq $0x86
364 retq
365 retq $0x87
366 retq
367 retq $0x88
368 retq
369 retq $0x89
370 retq
371 retq $0x8a
372 retq
373 retq $0x8b
374 retq
375 retq $0x8c
376 retq
377 retq $0x8d
378 retq
379 retq $0x8e
380 retq
381 retq $0x8f
382
383 retq
384 retq $0x90
385 retq
386 retq $0x91
387 retq
388 retq $0x92
389 retq
390 retq $0x93
391 retq
392 retq $0x94
393 retq
394 retq $0x95
395 retq
396 retq $0x96
397 retq
398 retq $0x97
399 retq
400 retq $0x98
401 retq
402 retq $0x99
403 retq
404 retq $0x9a
405 retq
406 retq $0x9b
407 retq
408 retq $0x9c
409 retq
410 retq $0x9d
411 retq
412 retq $0x9e
413 retq
414 retq $0x9f
415
416 retq
417 retq $0xa0
418 retq
419 retq $0xa1
420 retq
421 retq $0xa2
422 retq
423 retq $0xa3
424 retq
425 retq $0xa4
426 retq
427 retq $0xa5
428 retq
429 retq $0xa6
430 retq
431 retq $0xa7
432 retq
433 retq $0xa8
434 retq
435 retq $0xa9
436 retq
437 retq $0xaa
438 retq
439 retq $0xab
440 retq
441 retq $0xac
442 retq
443 retq $0xad
444 retq
445 retq $0xae
446 retq
447 retq $0xaf
448
449 retq
450 retq $0xb0
451 retq
452 retq $0xb1
453 retq
454 retq $0xb2
455 retq
456 retq $0xb3
457 retq
458 retq $0xb4
459 retq
460 retq $0xb5
461 retq
462 retq $0xb6
463 retq
464 retq $0xb7
465 retq
466 retq $0xb8
467 retq
468 retq $0xb9
469 retq
470 retq $0xba
471 retq
472 retq $0xbb
473 retq
474 retq $0xbc
475 retq
476 retq $0xbd
477 retq
478 retq $0xbe
479 retq
480 retq $0xbf
481
482 retq
483 retq $0xc0
484 retq
485 retq $0xc1
486 retq
487 retq $0xc2
488 retq
489 retq $0xc3
490 retq
491 retq $0xc4
492 retq
493 retq $0xc5
494 retq
495 retq $0xc6
496 retq
497 retq $0xc7
498 retq
499 retq $0xc8
500 retq
501 retq $0xc9
502 retq
503 retq $0xca
504 retq
505 retq $0xcb
506 retq
507 retq $0xcc
508 retq
509 retq $0xcd
510 retq
511 retq $0xce
512 retq
513 retq $0xcf
514
515 retq
516 retq $0xd0
517 retq
518 retq $0xd1
519 retq
520 retq $0xd2
521 retq
522 retq $0xd3
523 retq
524 retq $0xd4
525 retq
526 retq $0xd5
527 retq
528 retq $0xd6
529 retq
530 retq $0xd7
531 retq
532 retq $0xd8
533 retq
534 retq $0xd9
535 retq
536 retq $0xda
537 retq
538 retq $0xdb
539 retq
540 retq $0xdc
541 retq
542 retq $0xdd
543 retq
544 retq $0xde
545 retq
546 retq $0xdf
547
548 retq
549 retq $0xe0
550 retq
551 retq $0xe1
552 retq
553 retq $0xe2
554 retq
555 retq $0xe3
556 retq
557 retq $0xe4
558 retq
559 retq $0xe5
560 retq
561 retq $0xe6
562 retq
563 retq $0xe7
564 retq
565 retq $0xe8
566 retq
567 retq $0xe9
568 retq
569 retq $0xea
570 retq
571 retq $0xeb
572 retq
573 retq $0xec
574 retq
575 retq $0xed
576 retq
577 retq $0xee
578 retq
579 retq $0xef
580
581 retq
582 retq $0xf0
583 retq
584 retq $0xf1
585 retq
586 retq $0xf2
587 retq
588 retq $0xf3
589 retq
590 retq $0xf4
591 retq
592 retq $0xf5
593 retq
594 retq $0xf6
595 retq
596 retq $0xf7
597 retq
598 retq $0xf8
599 retq
600 retq $0xf9
601 retq
602 retq $0xfa
603 retq
604 retq $0xfb
605 retq
606 retq $0xfc
607 retq
608 retq $0xfd
609 retq
610 retq $0xfe
611 retq
612 retq $0xff
613
614 .align 16
615
616__itt_pt_byte:
617__itt_pt_byte_int:
618
619 and $0xff, %rdi
620 call __itt_pt_byte_pic
621__itt_pt_byte_pic:
622 popq %rcx
623 lea (__itt_pt_byte_call_table - __itt_pt_byte_pic) (%rcx,%rdi,1), %rdi
624 jmp *%rdi
625
626 .align 4
627
628 .long 0, 1, 2, 3 // GUID
629
630 .long 0xfadedeaf
631
632__itt_pt_byte_call_table:
633
634 .fill 256,1,0xc3
635
636 .align 16
637
638__itt_pt_event:
639__itt_pt_event_int:
640
641 pushq %rcx
642 mov %rdi,%rcx
643 rdpmc
644
645 xor %rdi, %rdi
646 mov %al, %dil
647 call __itt_pt_byte_int
648 shr $8, %eax
649 mov %al, %dil
650 call __itt_pt_byte_int
651 shr $8, %eax
652 mov %al, %dil
653 call __itt_pt_byte_int
654 shr $8, %eax
655 mov %al, %dil
656 call __itt_pt_byte_int
657
658 mov %dl, %dil
659 call __itt_pt_byte_int
660 shr $8, %edx
661 mov %dl, %dil
662 call __itt_pt_byte_int
663 shr $8, %edx
664 mov %dl, %dil
665 call __itt_pt_byte_int
666 shr $8, %edx
667 mov %dl, %dil
668 call __itt_pt_byte_int
669
670 popq %rcx
671 ret
672
673 .align 16
674
675__itt_pt_mark_event:
676
677 test $1, %rdi
678 jnz odd
679 mov %rdi, %rsi
680 xor %rdi,%rdi
681 call __itt_pt_event_int
682 mov %rsi, %rdi
683 jmp __itt_pt_mark_int
684
685odd:
686 call __itt_pt_mark_int
687 xor %rdi,%rdi
688 jmp __itt_pt_event_int
689
690
691 .align 16
692
693__itt_pt_flush:
694
695 call __itt_pt_flush_pic
696__itt_pt_flush_pic:
697 popq %rdx
698 lea (__itt_pt_mark_flush_1 - __itt_pt_flush_pic) (%rdx), %rax
699 jmp *%rax
700
701 .align 16
702 nop
703__itt_pt_mark_flush_1:
704 lea (__itt_pt_mark_flush_2 - __itt_pt_flush_pic) (%rdx), %rax
705 jmp *%rax
706
707 .align 16
708 nop
709 nop
710__itt_pt_mark_flush_2:
711 lea (__itt_pt_mark_flush_3 - __itt_pt_flush_pic) (%rdx), %rax
712 jmp *%rax
713
714 .align 16
715 nop
716 nop
717 nop
718__itt_pt_mark_flush_3:
719 ret
720
721 .align 16
722
723// int __itt_pt_mark_threshold(unsigned char index, unsigned long long* tmp, int threshold);
724
725__itt_pt_mark_threshold:
726 // rdi == index
727 // rsi == tmp
728 // rdx == threshold
729 mov %rdx, %r8 // r8 = threshold
730 xor %rdx, %rdx
731 xor %rax, %rax
732 test $1, %rdi
733 jnz mark_end
734mark_begin:
735 mov $((1 << 30) + 1),%rcx
736 rdpmc
737 shl $32, %rdx
738 or %rax, %rdx
739 mov %rdx, (%rsi)
740 jmp __itt_pt_mark_int
741mark_end:
742 mov $((1 << 30) + 1),%rcx
743 rdpmc
744 shl $32, %rdx
745 or %rax, %rdx
746 sub (%rsi), %rdx
747 cmp %r8, %rdx // threshold
748 jnc found
749 jmp __itt_pt_mark_int
750found:
751 call __itt_pt_mark_int
752 jmp __itt_pt_flush
753
754// PTWRITE
755
756 .align 16
757
758// void __itt_pt_write(unsigned long long value);
759
760 .long 0, 1, 2, 3 // GUID
761
762__itt_pt_write:
763
764// ptwrite rcx
765 .byte 0xF3, 0x48, 0x0F, 0xAE, 0xE1
766 ret
767