1 | /******************************************************************************* |
2 | * Copyright 2016-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | /******************************************************************************* |
18 | * Copyright (c) 2007 MITSUNARI Shigeo |
19 | * All rights reserved. |
20 | * |
21 | * Redistribution and use in source and binary forms, with or without |
22 | * modification, are permitted provided that the following conditions are met: |
23 | * |
24 | * Redistributions of source code must retain the above copyright notice, this |
25 | * list of conditions and the following disclaimer. |
26 | * Redistributions in binary form must reproduce the above copyright notice, |
27 | * this list of conditions and the following disclaimer in the documentation |
28 | * and/or other materials provided with the distribution. |
29 | * Neither the name of the copyright owner nor the names of its contributors may |
30 | * be used to endorse or promote products derived from this software without |
31 | * specific prior written permission. |
32 | * |
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
34 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
35 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
36 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
37 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
38 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
39 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
40 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
41 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
42 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
43 | * THE POSSIBILITY OF SUCH DAMAGE. |
44 | *******************************************************************************/ |
45 | |
46 | #pragma once |
47 | #ifndef XBYAK_XBYAK_H_ |
48 | #define XBYAK_XBYAK_H_ |
49 | /*! |
50 | @file xbyak.h |
51 | @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ |
52 | @author herumi |
53 | @url https://github.com/herumi/xbyak |
54 | @note modified new BSD license |
55 | http://opensource.org/licenses/BSD-3-Clause |
56 | */ |
57 | #if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not |
58 | #define XBYAK_NO_OP_NAMES |
59 | #endif |
60 | |
61 | #include <stdio.h> // for debug print |
62 | #include <assert.h> |
63 | #include <list> |
64 | #include <string> |
65 | #include <algorithm> |
66 | #ifndef NDEBUG |
67 | #include <iostream> |
68 | #endif |
69 | |
70 | // #define XBYAK_DISABLE_AVX512 |
71 | |
72 | #if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR) |
73 | #define XBYAK_USE_MMAP_ALLOCATOR |
74 | #endif |
75 | #if !defined(__GNUC__) || defined(__MINGW32__) |
76 | #undef XBYAK_USE_MMAP_ALLOCATOR |
77 | #endif |
78 | |
79 | #ifdef __GNUC__ |
80 | #define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor)) |
81 | #else |
82 | #define XBYAK_GNUC_PREREQ(major, minor) 0 |
83 | #endif |
84 | |
85 | // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. |
86 | #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ |
87 | ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) |
88 | #include <unordered_set> |
89 | #define XBYAK_STD_UNORDERED_SET std::unordered_set |
90 | #include <unordered_map> |
91 | #define XBYAK_STD_UNORDERED_MAP std::unordered_map |
92 | #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap |
93 | |
94 | /* |
95 | Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using |
96 | libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). |
97 | */ |
98 | #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__) |
99 | #include <tr1/unordered_set> |
100 | #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set |
101 | #include <tr1/unordered_map> |
102 | #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map |
103 | #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap |
104 | |
105 | #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) |
106 | #include <unordered_set> |
107 | #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set |
108 | #include <unordered_map> |
109 | #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map |
110 | #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap |
111 | |
112 | #else |
113 | #include <set> |
114 | #define XBYAK_STD_UNORDERED_SET std::set |
115 | #include <map> |
116 | #define XBYAK_STD_UNORDERED_MAP std::map |
117 | #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap |
118 | #endif |
119 | #ifdef _WIN32 |
120 | #ifndef WIN32_LEAN_AND_MEAN |
121 | #define WIN32_LEAN_AND_MEAN |
122 | #endif |
123 | #include <windows.h> |
124 | #include <malloc.h> |
125 | #ifdef _MSC_VER |
126 | #define XBYAK_TLS __declspec(thread) |
127 | #else |
128 | #define XBYAK_TLS __thread |
129 | #endif |
130 | #elif defined(__GNUC__) |
131 | #include <unistd.h> |
132 | #include <sys/mman.h> |
133 | #include <stdlib.h> |
134 | #define XBYAK_TLS __thread |
135 | #endif |
136 | #if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT) |
137 | #define XBYAK_USE_MAP_JIT |
138 | #include <sys/sysctl.h> |
139 | #ifndef MAP_JIT |
140 | #define MAP_JIT 0x800 |
141 | #endif |
142 | #endif |
143 | #if !defined(_MSC_VER) || (_MSC_VER >= 1600) |
144 | #include <stdint.h> |
145 | #endif |
146 | |
147 | // MFD_CLOEXEC defined only linux 3.17 or later. |
148 | // Android wraps the memfd_create syscall from API version 30. |
149 | #if !defined(MFD_CLOEXEC) || (defined(__ANDROID__) && __ANDROID_API__ < 30) |
150 | #undef XBYAK_USE_MEMFD |
151 | #endif |
152 | |
153 | #if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__)) |
154 | #define XBYAK64_WIN |
155 | #elif defined(__x86_64__) |
156 | #define XBYAK64_GCC |
157 | #endif |
158 | #if !defined(XBYAK64) && !defined(XBYAK32) |
159 | #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN) |
160 | #define XBYAK64 |
161 | #else |
162 | #define XBYAK32 |
163 | #endif |
164 | #endif |
165 | |
166 | #if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900) |
167 | #undef XBYAK_TLS |
168 | #define XBYAK_TLS thread_local |
169 | #define XBYAK_VARIADIC_TEMPLATE |
170 | #define XBYAK_NOEXCEPT noexcept |
171 | #else |
172 | #define XBYAK_NOEXCEPT throw() |
173 | #endif |
174 | |
175 | // require c++14 or later |
176 | // Visual Studio 2017 version 15.0 or later |
177 | // g++-6 or later |
178 | #if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910) |
179 | #define XBYAK_CONSTEXPR constexpr |
180 | #else |
181 | #define XBYAK_CONSTEXPR |
182 | #endif |
183 | |
184 | #ifdef _MSC_VER |
185 | #pragma warning(push) |
186 | #pragma warning(disable : 4514) /* remove inline function */ |
187 | #pragma warning(disable : 4786) /* identifier is too long */ |
188 | #pragma warning(disable : 4503) /* name is too long */ |
189 | #pragma warning(disable : 4127) /* constant expresison */ |
190 | #endif |
191 | |
192 | // disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603 |
193 | #if defined(__GNUC__) && !defined(__clang__) |
194 | #define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS |
195 | #pragma GCC diagnostic push |
196 | #pragma GCC diagnostic ignored "-Warray-bounds" |
197 | #endif |
198 | |
199 | namespace Xbyak { |
200 | |
201 | enum { |
202 | DEFAULT_MAX_CODE_SIZE = 4096, |
203 | VERSION = 0x6630 /* 0xABCD = A.BC(.D) */ |
204 | }; |
205 | |
206 | #ifndef MIE_INTEGER_TYPE_DEFINED |
207 | #define MIE_INTEGER_TYPE_DEFINED |
208 | // for backward compatibility |
209 | typedef uint64_t uint64; |
210 | typedef int64_t sint64; |
211 | typedef uint32_t uint32; |
212 | typedef uint16_t uint16; |
213 | typedef uint8_t uint8; |
214 | #endif |
215 | |
216 | #ifndef MIE_ALIGN |
217 | #ifdef _MSC_VER |
218 | #define MIE_ALIGN(x) __declspec(align(x)) |
219 | #else |
220 | #define MIE_ALIGN(x) __attribute__((aligned(x))) |
221 | #endif |
222 | #endif |
223 | #ifndef MIE_PACK // for shufps |
224 | #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w)) |
225 | #endif |
226 | |
227 | enum { |
228 | ERR_NONE = 0, |
229 | ERR_BAD_ADDRESSING, |
230 | ERR_CODE_IS_TOO_BIG, |
231 | ERR_BAD_SCALE, |
232 | ERR_ESP_CANT_BE_INDEX, |
233 | ERR_BAD_COMBINATION, |
234 | ERR_BAD_SIZE_OF_REGISTER, |
235 | ERR_IMM_IS_TOO_BIG, |
236 | ERR_BAD_ALIGN, |
237 | ERR_LABEL_IS_REDEFINED, |
238 | ERR_LABEL_IS_TOO_FAR, |
239 | ERR_LABEL_IS_NOT_FOUND, |
240 | ERR_CODE_ISNOT_COPYABLE, |
241 | ERR_BAD_PARAMETER, |
242 | ERR_CANT_PROTECT, |
243 | ERR_CANT_USE_64BIT_DISP, |
244 | ERR_OFFSET_IS_TOO_BIG, |
245 | ERR_MEM_SIZE_IS_NOT_SPECIFIED, |
246 | ERR_BAD_MEM_SIZE, |
247 | ERR_BAD_ST_COMBINATION, |
248 | ERR_OVER_LOCAL_LABEL, // not used |
249 | ERR_UNDER_LOCAL_LABEL, |
250 | ERR_CANT_ALLOC, |
251 | ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW, |
252 | ERR_BAD_PROTECT_MODE, |
253 | ERR_BAD_PNUM, |
254 | ERR_BAD_TNUM, |
255 | ERR_BAD_VSIB_ADDRESSING, |
256 | ERR_CANT_CONVERT, |
257 | ERR_LABEL_ISNOT_SET_BY_L, |
258 | ERR_LABEL_IS_ALREADY_SET_BY_L, |
259 | ERR_BAD_LABEL_STR, |
260 | ERR_MUNMAP, |
261 | ERR_OPMASK_IS_ALREADY_SET, |
262 | ERR_ROUNDING_IS_ALREADY_SET, |
263 | ERR_K0_IS_INVALID, |
264 | ERR_EVEX_IS_INVALID, |
265 | ERR_SAE_IS_INVALID, |
266 | ERR_ER_IS_INVALID, |
267 | ERR_INVALID_BROADCAST, |
268 | ERR_INVALID_OPMASK_WITH_MEMORY, |
269 | ERR_INVALID_ZERO, |
270 | ERR_INVALID_RIP_IN_AUTO_GROW, |
271 | ERR_INVALID_MIB_ADDRESS, |
272 | ERR_X2APIC_IS_NOT_SUPPORTED, |
273 | ERR_NOT_SUPPORTED, |
274 | ERR_SAME_REGS_ARE_INVALID, |
275 | ERR_INTERNAL // Put it at last. |
276 | }; |
277 | |
278 | inline const char *ConvertErrorToString(int err) |
279 | { |
280 | static const char *errTbl[] = { |
281 | "none" , |
282 | "bad addressing" , |
283 | "code is too big" , |
284 | "bad scale" , |
285 | "esp can't be index" , |
286 | "bad combination" , |
287 | "bad size of register" , |
288 | "imm is too big" , |
289 | "bad align" , |
290 | "label is redefined" , |
291 | "label is too far" , |
292 | "label is not found" , |
293 | "code is not copyable" , |
294 | "bad parameter" , |
295 | "can't protect" , |
296 | "can't use 64bit disp(use (void*))" , |
297 | "offset is too big" , |
298 | "MEM size is not specified" , |
299 | "bad mem size" , |
300 | "bad st combination" , |
301 | "over local label" , |
302 | "under local label" , |
303 | "can't alloc" , |
304 | "T_SHORT is not supported in AutoGrow" , |
305 | "bad protect mode" , |
306 | "bad pNum" , |
307 | "bad tNum" , |
308 | "bad vsib addressing" , |
309 | "can't convert" , |
310 | "label is not set by L()" , |
311 | "label is already set by L()" , |
312 | "bad label string" , |
313 | "err munmap" , |
314 | "opmask is already set" , |
315 | "rounding is already set" , |
316 | "k0 is invalid" , |
317 | "evex is invalid" , |
318 | "sae(suppress all exceptions) is invalid" , |
319 | "er(embedded rounding) is invalid" , |
320 | "invalid broadcast" , |
321 | "invalid opmask with memory" , |
322 | "invalid zero" , |
323 | "invalid rip in AutoGrow" , |
324 | "invalid mib address" , |
325 | "x2APIC is not supported" , |
326 | "not supported" , |
327 | "same regs are invalid" , |
328 | "internal error" |
329 | }; |
330 | assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); |
331 | return err <= ERR_INTERNAL ? errTbl[err] : "unknown err" ; |
332 | } |
333 | |
334 | #ifdef XBYAK_NO_EXCEPTION |
335 | namespace local { |
336 | |
337 | inline int& GetErrorRef() { |
338 | static XBYAK_TLS int err = 0; |
339 | return err; |
340 | } |
341 | |
342 | inline void SetError(int err) { |
343 | if (local::GetErrorRef()) return; // keep the first err code |
344 | local::GetErrorRef() = err; |
345 | } |
346 | |
347 | } // local |
348 | |
349 | inline void ClearError() { |
350 | local::GetErrorRef() = 0; |
351 | } |
352 | inline int GetError() { return Xbyak::local::GetErrorRef(); } |
353 | |
354 | #define XBYAK_THROW(err) { Xbyak::local::SetError(err); return; } |
355 | #define XBYAK_THROW_RET(err, r) { Xbyak::local::SetError(err); return r; } |
356 | |
357 | #else |
358 | class Error : public std::exception { |
359 | int err_; |
360 | public: |
361 | explicit Error(int err) : err_(err) |
362 | { |
363 | if (err_ < 0 || err_ > ERR_INTERNAL) { |
364 | err_ = ERR_INTERNAL; |
365 | } |
366 | } |
367 | operator int() const { return err_; } |
368 | const char *what() const XBYAK_NOEXCEPT |
369 | { |
370 | return ConvertErrorToString(err_); |
371 | } |
372 | }; |
373 | |
374 | // dummy functions |
375 | inline void ClearError() { } |
376 | inline int GetError() { return 0; } |
377 | |
378 | inline const char *ConvertErrorToString(const Error& err) |
379 | { |
380 | return err.what(); |
381 | } |
382 | |
383 | #define XBYAK_THROW(err) { throw Error(err); } |
384 | #define XBYAK_THROW_RET(err, r) { throw Error(err); } |
385 | |
386 | #endif |
387 | |
388 | inline void *AlignedMalloc(size_t size, size_t alignment) |
389 | { |
390 | #ifdef __MINGW32__ |
391 | return __mingw_aligned_malloc(size, alignment); |
392 | #elif defined(_WIN32) |
393 | return _aligned_malloc(size, alignment); |
394 | #else |
395 | void *p; |
396 | int ret = posix_memalign(&p, alignment, size); |
397 | return (ret == 0) ? p : 0; |
398 | #endif |
399 | } |
400 | |
401 | inline void AlignedFree(void *p) |
402 | { |
403 | #ifdef __MINGW32__ |
404 | __mingw_aligned_free(p); |
405 | #elif defined(_MSC_VER) |
406 | _aligned_free(p); |
407 | #else |
408 | free(p); |
409 | #endif |
410 | } |
411 | |
412 | template<class To, class From> |
413 | inline const To CastTo(From p) XBYAK_NOEXCEPT |
414 | { |
415 | return (const To)(size_t)(p); |
416 | } |
417 | namespace inner { |
418 | |
419 | static const size_t ALIGN_PAGE_SIZE = 4096; |
420 | |
421 | inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; } |
422 | inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } |
423 | |
424 | inline uint32_t VerifyInInt32(uint64_t x) |
425 | { |
426 | #ifdef XBYAK64 |
427 | if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0) |
428 | #endif |
429 | return static_cast<uint32_t>(x); |
430 | } |
431 | |
432 | enum LabelMode { |
433 | LasIs, // as is |
434 | Labs, // absolute |
435 | LaddTop // (addr + top) for mov(reg, label) with AutoGrow |
436 | }; |
437 | |
438 | } // inner |
439 | |
440 | /* |
441 | custom allocator |
442 | */ |
443 | struct Allocator { |
444 | explicit Allocator(const std::string& = "" ) {} // same interface with MmapAllocator |
445 | virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } |
446 | virtual void free(uint8_t *p) { AlignedFree(p); } |
447 | virtual ~Allocator() {} |
448 | /* override to return false if you call protect() manually */ |
449 | virtual bool useProtect() const { return true; } |
450 | }; |
451 | |
452 | #ifdef XBYAK_USE_MMAP_ALLOCATOR |
453 | #ifdef XBYAK_USE_MAP_JIT |
454 | namespace util { |
455 | |
456 | inline int getMacOsVersionPure() |
457 | { |
458 | char buf[64]; |
459 | size_t size = sizeof(buf); |
460 | int err = sysctlbyname("kern.osrelease" , buf, &size, NULL, 0); |
461 | if (err != 0) return 0; |
462 | char *endp; |
463 | int major = strtol(buf, &endp, 10); |
464 | if (*endp != '.') return 0; |
465 | return major; |
466 | } |
467 | |
468 | inline int getMacOsVersion() |
469 | { |
470 | static const int version = getMacOsVersionPure(); |
471 | return version; |
472 | } |
473 | |
474 | } // util |
475 | #endif |
476 | class MmapAllocator : public Allocator { |
477 | struct Allocation { |
478 | size_t size; |
479 | #if defined(XBYAK_USE_MEMFD) |
480 | // fd_ is only used with XBYAK_USE_MEMFD. We keep the file open |
481 | // during the lifetime of each allocation in order to support |
482 | // checkpoint/restore by unprivileged users. |
483 | int fd; |
484 | #endif |
485 | }; |
486 | const std::string name_; // only used with XBYAK_USE_MEMFD |
487 | typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, Allocation> AllocationList; |
488 | AllocationList allocList_; |
489 | public: |
490 | explicit MmapAllocator(const std::string& name = "xbyak" ) : name_(name) {} |
491 | uint8_t *alloc(size_t size) |
492 | { |
493 | const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; |
494 | size = (size + alignedSizeM1) & ~alignedSizeM1; |
495 | #if defined(MAP_ANONYMOUS) |
496 | int mode = MAP_PRIVATE | MAP_ANONYMOUS; |
497 | #elif defined(MAP_ANON) |
498 | int mode = MAP_PRIVATE | MAP_ANON; |
499 | #else |
500 | #error "not supported" |
501 | #endif |
502 | #if defined(XBYAK_USE_MAP_JIT) |
503 | const int mojaveVersion = 18; |
504 | if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; |
505 | #endif |
506 | int fd = -1; |
507 | #if defined(XBYAK_USE_MEMFD) |
508 | fd = memfd_create(name_.c_str(), MFD_CLOEXEC); |
509 | if (fd != -1) { |
510 | mode = MAP_SHARED; |
511 | if (ftruncate(fd, size) != 0) { |
512 | close(fd); |
513 | XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) |
514 | } |
515 | } |
516 | #endif |
517 | void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0); |
518 | if (p == MAP_FAILED) { |
519 | if (fd != -1) close(fd); |
520 | XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) |
521 | } |
522 | assert(p); |
523 | Allocation &alloc = allocList_[(uintptr_t)p]; |
524 | alloc.size = size; |
525 | #if defined(XBYAK_USE_MEMFD) |
526 | alloc.fd = fd; |
527 | #endif |
528 | return (uint8_t*)p; |
529 | } |
530 | void free(uint8_t *p) |
531 | { |
532 | if (p == 0) return; |
533 | AllocationList::iterator i = allocList_.find((uintptr_t)p); |
534 | if (i == allocList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER) |
535 | if (munmap((void*)i->first, i->second.size) < 0) XBYAK_THROW(ERR_MUNMAP) |
536 | #if defined(XBYAK_USE_MEMFD) |
537 | if (i->second.fd != -1) close(i->second.fd); |
538 | #endif |
539 | allocList_.erase(i); |
540 | } |
541 | }; |
542 | #else |
543 | typedef Allocator MmapAllocator; |
544 | #endif |
545 | |
546 | class Address; |
547 | class Reg; |
548 | |
549 | class Operand { |
550 | static const uint8_t EXT8BIT = 0x20; |
551 | unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil |
552 | unsigned int kind_:10; |
553 | unsigned int bit_:14; |
554 | protected: |
555 | unsigned int zero_:1; |
556 | unsigned int mask_:3; |
557 | unsigned int rounding_:3; |
558 | void setIdx(int idx) { idx_ = idx; } |
559 | public: |
560 | enum Kind { |
561 | NONE = 0, |
562 | MEM = 1 << 0, |
563 | REG = 1 << 1, |
564 | MMX = 1 << 2, |
565 | FPU = 1 << 3, |
566 | XMM = 1 << 4, |
567 | YMM = 1 << 5, |
568 | ZMM = 1 << 6, |
569 | OPMASK = 1 << 7, |
570 | BNDREG = 1 << 8, |
571 | TMM = 1 << 9 |
572 | }; |
573 | enum Code { |
574 | #ifdef XBYAK64 |
575 | RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, |
576 | R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D, |
577 | R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W, |
578 | R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B, |
579 | SPL = 4, BPL, SIL, DIL, |
580 | #endif |
581 | EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, |
582 | AX = 0, CX, DX, BX, SP, BP, SI, DI, |
583 | AL = 0, CL, DL, BL, AH, CH, DH, BH |
584 | }; |
585 | XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { } |
586 | XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0) |
587 | : idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0))) |
588 | , kind_(kind) |
589 | , bit_(bit) |
590 | , zero_(0), mask_(0), rounding_(0) |
591 | { |
592 | assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two |
593 | } |
594 | XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); } |
595 | XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); } |
596 | XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; } |
597 | XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); } |
598 | XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); } |
599 | XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); } |
600 | XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); } |
601 | XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); } |
602 | XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); } |
603 | XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); } |
604 | XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); } |
605 | XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); } |
606 | XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); } |
607 | XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); } |
608 | XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); } |
609 | XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); } |
610 | XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; } |
611 | XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; } |
612 | XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; } |
613 | XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); } |
614 | XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); } |
615 | XBYAK_CONSTEXPR bool hasZero() const { return zero_; } |
616 | XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; } |
617 | XBYAK_CONSTEXPR int getRounding() const { return rounding_; } |
618 | void setKind(Kind kind) |
619 | { |
620 | if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return; |
621 | kind_ = kind; |
622 | bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192; |
623 | } |
624 | // err if MMX/FPU/OPMASK/BNDREG |
625 | void setBit(int bit); |
626 | void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true) |
627 | { |
628 | if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) |
629 | mask_ = idx; |
630 | } |
631 | void setRounding(int idx) |
632 | { |
633 | if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET) |
634 | rounding_ = idx; |
635 | } |
636 | void setZero() { zero_ = true; } |
637 | // ah, ch, dh, bh? |
638 | bool isHigh8bit() const |
639 | { |
640 | if (!isBit(8)) return false; |
641 | if (isExt8bit()) return false; |
642 | const int idx = getIdx(); |
643 | return AH <= idx && idx <= BH; |
644 | } |
645 | // any bit is accetable if bit == 0 |
646 | XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const |
647 | { |
648 | return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16) |
649 | } |
650 | XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; } |
651 | XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; } |
652 | const char *toString() const |
653 | { |
654 | const int idx = getIdx(); |
655 | if (kind_ == REG) { |
656 | if (isExt8bit()) { |
657 | static const char *tbl[4] = { "spl" , "bpl" , "sil" , "dil" }; |
658 | return tbl[idx - 4]; |
659 | } |
660 | static const char *tbl[4][16] = { |
661 | { "al" , "cl" , "dl" , "bl" , "ah" , "ch" , "dh" , "bh" , "r8b" , "r9b" , "r10b" , "r11b" , "r12b" , "r13b" , "r14b" , "r15b" }, |
662 | { "ax" , "cx" , "dx" , "bx" , "sp" , "bp" , "si" , "di" , "r8w" , "r9w" , "r10w" , "r11w" , "r12w" , "r13w" , "r14w" , "r15w" }, |
663 | { "eax" , "ecx" , "edx" , "ebx" , "esp" , "ebp" , "esi" , "edi" , "r8d" , "r9d" , "r10d" , "r11d" , "r12d" , "r13d" , "r14d" , "r15d" }, |
664 | { "rax" , "rcx" , "rdx" , "rbx" , "rsp" , "rbp" , "rsi" , "rdi" , "r8" , "r9" , "r10" , "r11" , "r12" , "r13" , "r14" , "r15" }, |
665 | }; |
666 | return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx]; |
667 | } else if (isOPMASK()) { |
668 | static const char *tbl[8] = { "k0" , "k1" , "k2" , "k3" , "k4" , "k5" , "k6" , "k7" }; |
669 | return tbl[idx]; |
670 | } else if (isTMM()) { |
671 | static const char *tbl[8] = { |
672 | "tmm0" , "tmm1" , "tmm2" , "tmm3" , "tmm4" , "tmm5" , "tmm6" , "tmm7" |
673 | }; |
674 | return tbl[idx]; |
675 | } else if (isZMM()) { |
676 | static const char *tbl[32] = { |
677 | "zmm0" , "zmm1" , "zmm2" , "zmm3" , "zmm4" , "zmm5" , "zmm6" , "zmm7" , "zmm8" , "zmm9" , "zmm10" , "zmm11" , "zmm12" , "zmm13" , "zmm14" , "zmm15" , |
678 | "zmm16" , "zmm17" , "zmm18" , "zmm19" , "zmm20" , "zmm21" , "zmm22" , "zmm23" , "zmm24" , "zmm25" , "zmm26" , "zmm27" , "zmm28" , "zmm29" , "zmm30" , "zmm31" |
679 | }; |
680 | return tbl[idx]; |
681 | } else if (isYMM()) { |
682 | static const char *tbl[32] = { |
683 | "ymm0" , "ymm1" , "ymm2" , "ymm3" , "ymm4" , "ymm5" , "ymm6" , "ymm7" , "ymm8" , "ymm9" , "ymm10" , "ymm11" , "ymm12" , "ymm13" , "ymm14" , "ymm15" , |
684 | "ymm16" , "ymm17" , "ymm18" , "ymm19" , "ymm20" , "ymm21" , "ymm22" , "ymm23" , "ymm24" , "ymm25" , "ymm26" , "ymm27" , "ymm28" , "ymm29" , "ymm30" , "ymm31" |
685 | }; |
686 | return tbl[idx]; |
687 | } else if (isXMM()) { |
688 | static const char *tbl[32] = { |
689 | "xmm0" , "xmm1" , "xmm2" , "xmm3" , "xmm4" , "xmm5" , "xmm6" , "xmm7" , "xmm8" , "xmm9" , "xmm10" , "xmm11" , "xmm12" , "xmm13" , "xmm14" , "xmm15" , |
690 | "xmm16" , "xmm17" , "xmm18" , "xmm19" , "xmm20" , "xmm21" , "xmm22" , "xmm23" , "xmm24" , "xmm25" , "xmm26" , "xmm27" , "xmm28" , "xmm29" , "xmm30" , "xmm31" |
691 | }; |
692 | return tbl[idx]; |
693 | } else if (isMMX()) { |
694 | static const char *tbl[8] = { "mm0" , "mm1" , "mm2" , "mm3" , "mm4" , "mm5" , "mm6" , "mm7" }; |
695 | return tbl[idx]; |
696 | } else if (isFPU()) { |
697 | static const char *tbl[8] = { "st0" , "st1" , "st2" , "st3" , "st4" , "st5" , "st6" , "st7" }; |
698 | return tbl[idx]; |
699 | } else if (isBNDREG()) { |
700 | static const char *tbl[4] = { "bnd0" , "bnd1" , "bnd2" , "bnd3" }; |
701 | return tbl[idx]; |
702 | } |
703 | XBYAK_THROW_RET(ERR_INTERNAL, 0); |
704 | } |
705 | bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; } |
706 | bool operator==(const Operand& rhs) const; |
707 | bool operator!=(const Operand& rhs) const { return !operator==(rhs); } |
708 | const Address& getAddress() const; |
709 | const Reg& getReg() const; |
710 | }; |
711 | |
712 | inline void Operand::setBit(int bit) |
713 | { |
714 | if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR; |
715 | if (isBit(bit)) return; |
716 | if (is(MEM | OPMASK)) { |
717 | bit_ = bit; |
718 | return; |
719 | } |
720 | if (is(REG | XMM | YMM | ZMM | TMM)) { |
721 | int idx = getIdx(); |
722 | // err if converting ah, bh, ch, dh |
723 | if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR; |
724 | Kind kind = REG; |
725 | switch (bit) { |
726 | case 8: |
727 | if (idx >= 16) goto ERR; |
728 | #ifdef XBYAK32 |
729 | if (idx >= 4) goto ERR; |
730 | #else |
731 | if (4 <= idx && idx < 8) idx |= EXT8BIT; |
732 | #endif |
733 | break; |
734 | case 16: |
735 | case 32: |
736 | case 64: |
737 | if (idx >= 16) goto ERR; |
738 | break; |
739 | case 128: kind = XMM; break; |
740 | case 256: kind = YMM; break; |
741 | case 512: kind = ZMM; break; |
742 | case 8192: kind = TMM; break; |
743 | } |
744 | idx_ = idx; |
745 | kind_ = kind; |
746 | bit_ = bit; |
747 | if (bit >= 128) return; // keep mask_ and rounding_ |
748 | mask_ = 0; |
749 | rounding_ = 0; |
750 | return; |
751 | } |
752 | ERR: |
753 | XBYAK_THROW(ERR_CANT_CONVERT) |
754 | } |
755 | |
756 | class Label; |
757 | |
758 | struct Reg8; |
759 | struct Reg16; |
760 | struct Reg32; |
761 | #ifdef XBYAK64 |
762 | struct Reg64; |
763 | #endif |
764 | class Reg : public Operand { |
765 | public: |
766 | XBYAK_CONSTEXPR Reg() { } |
767 | XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } |
768 | // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM |
769 | Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; } |
770 | uint8_t getRexW() const { return isREG(64) ? 8 : 0; } |
771 | uint8_t getRexR() const { return isExtIdx() ? 4 : 0; } |
772 | uint8_t getRexX() const { return isExtIdx() ? 2 : 0; } |
773 | uint8_t getRexB() const { return isExtIdx() ? 1 : 0; } |
774 | uint8_t getRex(const Reg& base = Reg()) const |
775 | { |
776 | uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB(); |
777 | if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40; |
778 | return rex; |
779 | } |
780 | Reg8 cvt8() const; |
781 | Reg16 cvt16() const; |
782 | Reg32 cvt32() const; |
783 | #ifdef XBYAK64 |
784 | Reg64 cvt64() const; |
785 | #endif |
786 | }; |
787 | |
788 | inline const Reg& Operand::getReg() const |
789 | { |
790 | assert(!isMEM()); |
791 | return static_cast<const Reg&>(*this); |
792 | } |
793 | |
794 | struct Reg8 : public Reg { |
795 | explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } |
796 | }; |
797 | |
798 | struct Reg16 : public Reg { |
799 | explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } |
800 | }; |
801 | |
802 | struct Mmx : public Reg { |
803 | explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } |
804 | }; |
805 | |
806 | struct EvexModifierRounding { |
807 | enum { |
808 | T_RN_SAE = 1, |
809 | T_RD_SAE = 2, |
810 | T_RU_SAE = 3, |
811 | T_RZ_SAE = 4, |
812 | T_SAE = 5 |
813 | }; |
814 | explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {} |
815 | int rounding; |
816 | }; |
817 | struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}}; |
818 | |
819 | struct Xmm : public Mmx { |
820 | explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } |
821 | XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } |
822 | Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; } |
823 | Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; } |
824 | Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; } |
825 | }; |
826 | |
827 | struct Ymm : public Xmm { |
828 | explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } |
829 | Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; } |
830 | }; |
831 | |
832 | struct Zmm : public Ymm { |
833 | explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } |
834 | Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; } |
835 | }; |
836 | |
837 | #ifdef XBYAK64 |
838 | struct Tmm : public Reg { |
839 | explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { } |
840 | }; |
841 | #endif |
842 | |
843 | struct Opmask : public Reg { |
844 | explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} |
845 | }; |
846 | |
847 | struct BoundsReg : public Reg { |
848 | explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} |
849 | }; |
850 | |
851 | template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; } |
852 | template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; } |
853 | template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; } |
854 | |
855 | struct Fpu : public Reg { |
856 | explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } |
857 | }; |
858 | |
859 | struct Reg32e : public Reg { |
860 | explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} |
861 | }; |
862 | struct Reg32 : public Reg32e { |
863 | explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {} |
864 | }; |
865 | #ifdef XBYAK64 |
866 | struct Reg64 : public Reg32e { |
867 | explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {} |
868 | }; |
869 | struct RegRip { |
870 | int64_t disp_; |
871 | const Label* label_; |
872 | bool isAddr_; |
873 | explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} |
874 | friend const RegRip operator+(const RegRip& r, int disp) { |
875 | return RegRip(r.disp_ + disp, r.label_, r.isAddr_); |
876 | } |
877 | friend const RegRip operator-(const RegRip& r, int disp) { |
878 | return RegRip(r.disp_ - disp, r.label_, r.isAddr_); |
879 | } |
880 | friend const RegRip operator+(const RegRip& r, int64_t disp) { |
881 | return RegRip(r.disp_ + disp, r.label_, r.isAddr_); |
882 | } |
883 | friend const RegRip operator-(const RegRip& r, int64_t disp) { |
884 | return RegRip(r.disp_ - disp, r.label_, r.isAddr_); |
885 | } |
886 | friend const RegRip operator+(const RegRip& r, const Label& label) { |
887 | if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); |
888 | return RegRip(r.disp_, &label); |
889 | } |
890 | friend const RegRip operator+(const RegRip& r, const void *addr) { |
891 | if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); |
892 | return RegRip(r.disp_ + (int64_t)addr, 0, true); |
893 | } |
894 | }; |
895 | #endif |
896 | |
897 | inline Reg8 Reg::cvt8() const |
898 | { |
899 | Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit()); |
900 | } |
901 | |
902 | inline Reg16 Reg::cvt16() const |
903 | { |
904 | return Reg16(changeBit(16).getIdx()); |
905 | } |
906 | |
907 | inline Reg32 Reg::cvt32() const |
908 | { |
909 | return Reg32(changeBit(32).getIdx()); |
910 | } |
911 | |
912 | #ifdef XBYAK64 |
913 | inline Reg64 Reg::cvt64() const |
914 | { |
915 | return Reg64(changeBit(64).getIdx()); |
916 | } |
917 | #endif |
918 | |
919 | #ifndef XBYAK_DISABLE_SEGMENT |
920 | // not derived from Reg |
921 | class Segment { |
922 | int idx_; |
923 | public: |
924 | enum { |
925 | es, cs, ss, ds, fs, gs |
926 | }; |
927 | explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } |
928 | int getIdx() const { return idx_; } |
929 | const char *toString() const |
930 | { |
931 | static const char tbl[][3] = { |
932 | "es" , "cs" , "ss" , "ds" , "fs" , "gs" |
933 | }; |
934 | return tbl[idx_]; |
935 | } |
936 | }; |
937 | #endif |
938 | |
939 | class RegExp { |
940 | public: |
941 | #ifdef XBYAK64 |
942 | enum { i32e = 32 | 64 }; |
943 | #else |
944 | enum { i32e = 32 }; |
945 | #endif |
946 | XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } |
947 | XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) |
948 | : scale_(scale) |
949 | , disp_(0) |
950 | { |
951 | if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
952 | if (scale == 0) return; |
953 | if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE) |
954 | if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index |
955 | index_ = r; |
956 | } else { |
957 | base_ = r; |
958 | } |
959 | } |
960 | bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); } |
961 | RegExp optimize() const |
962 | { |
963 | RegExp exp = *this; |
964 | // [reg * 2] => [reg + reg] |
965 | if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) { |
966 | exp.base_ = index_; |
967 | exp.scale_ = 1; |
968 | } |
969 | return exp; |
970 | } |
971 | bool operator==(const RegExp& rhs) const |
972 | { |
973 | return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_; |
974 | } |
975 | const Reg& getBase() const { return base_; } |
976 | const Reg& getIndex() const { return index_; } |
977 | int getScale() const { return scale_; } |
978 | size_t getDisp() const { return disp_; } |
979 | XBYAK_CONSTEXPR void verify() const |
980 | { |
981 | if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
982 | if (index_.getBit() && index_.getBit() <= 64) { |
983 | if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX) |
984 | if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
985 | } |
986 | } |
987 | friend RegExp operator+(const RegExp& a, const RegExp& b); |
988 | friend RegExp operator-(const RegExp& e, size_t disp); |
989 | uint8_t getRex() const |
990 | { |
991 | uint8_t rex = index_.getRexX() | base_.getRexB(); |
992 | return rex ? uint8_t(rex | 0x40) : 0; |
993 | } |
994 | private: |
995 | /* |
996 | [base_ + index_ * scale_ + disp_] |
997 | base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm |
998 | */ |
999 | Reg base_; |
1000 | Reg index_; |
1001 | int scale_; |
1002 | size_t disp_; |
1003 | }; |
1004 | |
1005 | inline RegExp operator+(const RegExp& a, const RegExp& b) |
1006 | { |
1007 | if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) |
1008 | RegExp ret = a; |
1009 | if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; } |
1010 | if (b.base_.getBit()) { |
1011 | if (ret.base_.getBit()) { |
1012 | if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) |
1013 | // base + base => base + index * 1 |
1014 | ret.index_ = b.base_; |
1015 | // [reg + esp] => [esp + reg] |
1016 | if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_); |
1017 | ret.scale_ = 1; |
1018 | } else { |
1019 | ret.base_ = b.base_; |
1020 | } |
1021 | } |
1022 | ret.disp_ += b.disp_; |
1023 | return ret; |
1024 | } |
1025 | inline RegExp operator*(const Reg& r, int scale) |
1026 | { |
1027 | return RegExp(r, scale); |
1028 | } |
1029 | inline RegExp operator*(int scale, const Reg& r) |
1030 | { |
1031 | return r * scale; |
1032 | } |
1033 | inline RegExp operator-(const RegExp& e, size_t disp) |
1034 | { |
1035 | RegExp ret = e; |
1036 | ret.disp_ -= disp; |
1037 | return ret; |
1038 | } |
1039 | |
1040 | // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) |
1041 | void *const AutoGrow = (void*)1; //-V566 |
1042 | void *const DontSetProtectRWE = (void*)2; //-V566 |
1043 | |
1044 | class CodeArray { |
1045 | enum Type { |
1046 | USER_BUF = 1, // use userPtr(non alignment, non protect) |
1047 | ALLOC_BUF, // use new(alignment, protect) |
1048 | AUTO_GROW // automatically move and grow memory if necessary |
1049 | }; |
1050 | CodeArray(const CodeArray& rhs); |
1051 | void operator=(const CodeArray&); |
1052 | bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; } |
1053 | struct AddrInfo { |
1054 | size_t codeOffset; // position to write |
1055 | size_t jmpAddr; // value to write |
1056 | int jmpSize; // size of jmpAddr |
1057 | inner::LabelMode mode; |
1058 | AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) |
1059 | : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} |
1060 | uint64_t getVal(const uint8_t *top) const |
1061 | { |
1062 | uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); |
1063 | if (jmpSize == 4) disp = inner::VerifyInInt32(disp); |
1064 | return disp; |
1065 | } |
1066 | }; |
1067 | typedef std::list<AddrInfo> AddrInfoList; |
1068 | AddrInfoList addrInfoList_; |
1069 | const Type type_; |
1070 | #ifdef XBYAK_USE_MMAP_ALLOCATOR |
1071 | MmapAllocator defaultAllocator_; |
1072 | #else |
1073 | Allocator defaultAllocator_; |
1074 | #endif |
1075 | Allocator *alloc_; |
1076 | protected: |
1077 | size_t maxSize_; |
1078 | uint8_t *top_; |
1079 | size_t size_; |
1080 | bool isCalledCalcJmpAddress_; |
1081 | |
1082 | bool useProtect() const { return alloc_->useProtect(); } |
1083 | /* |
1084 | allocate new memory and copy old data to the new area |
1085 | */ |
1086 | void growMemory() |
1087 | { |
1088 | const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); |
1089 | uint8_t *newTop = alloc_->alloc(newSize); |
1090 | if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC) |
1091 | for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; |
1092 | alloc_->free(top_); |
1093 | top_ = newTop; |
1094 | maxSize_ = newSize; |
1095 | } |
1096 | /* |
1097 | calc jmp address for AutoGrow mode |
1098 | */ |
1099 | void calcJmpAddress() |
1100 | { |
1101 | if (isCalledCalcJmpAddress_) return; |
1102 | for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { |
1103 | uint64_t disp = i->getVal(top_); |
1104 | rewrite(i->codeOffset, disp, i->jmpSize); |
1105 | } |
1106 | isCalledCalcJmpAddress_ = true; |
1107 | } |
1108 | public: |
1109 | enum ProtectMode { |
1110 | PROTECT_RW = 0, // read/write |
1111 | PROTECT_RWE = 1, // read/write/exec |
1112 | PROTECT_RE = 2 // read/exec |
1113 | }; |
1114 | explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0) |
1115 | : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF) |
1116 | , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) |
1117 | , maxSize_(maxSize) |
1118 | , top_(type_ == USER_BUF ? reinterpret_cast<uint8_t*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1))) |
1119 | , size_(0) |
1120 | , isCalledCalcJmpAddress_(false) |
1121 | { |
1122 | if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC) |
1123 | if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) { |
1124 | alloc_->free(top_); |
1125 | XBYAK_THROW(ERR_CANT_PROTECT) |
1126 | } |
1127 | } |
1128 | virtual ~CodeArray() |
1129 | { |
1130 | if (isAllocType()) { |
1131 | if (useProtect()) setProtectModeRW(false); |
1132 | alloc_->free(top_); |
1133 | } |
1134 | } |
1135 | bool setProtectMode(ProtectMode mode, bool throwException = true) |
1136 | { |
1137 | bool isOK = protect(top_, maxSize_, mode); |
1138 | if (isOK) return true; |
1139 | if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false) |
1140 | return false; |
1141 | } |
1142 | bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); } |
1143 | bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); } |
1144 | void resetSize() |
1145 | { |
1146 | size_ = 0; |
1147 | addrInfoList_.clear(); |
1148 | isCalledCalcJmpAddress_ = false; |
1149 | } |
1150 | void db(int code) |
1151 | { |
1152 | if (size_ >= maxSize_) { |
1153 | if (type_ == AUTO_GROW) { |
1154 | growMemory(); |
1155 | } else { |
1156 | XBYAK_THROW(ERR_CODE_IS_TOO_BIG) |
1157 | } |
1158 | } |
1159 | top_[size_++] = static_cast<uint8_t>(code); |
1160 | } |
1161 | void db(const uint8_t *code, size_t codeSize) |
1162 | { |
1163 | for (size_t i = 0; i < codeSize; i++) db(code[i]); |
1164 | } |
1165 | void db(uint64_t code, size_t codeSize) |
1166 | { |
1167 | if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER) |
1168 | for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8_t>(code >> (i * 8))); |
1169 | } |
1170 | void dw(uint32_t code) { db(code, 2); } |
1171 | void dd(uint32_t code) { db(code, 4); } |
1172 | void dq(uint64_t code) { db(code, 8); } |
1173 | const uint8_t *getCode() const { return top_; } |
1174 | template<class F> |
1175 | const F getCode() const { return reinterpret_cast<F>(top_); } |
1176 | const uint8_t *getCurr() const { return &top_[size_]; } |
1177 | template<class F> |
1178 | const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); } |
1179 | size_t getSize() const { return size_; } |
1180 | void setSize(size_t size) |
1181 | { |
1182 | if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) |
1183 | size_ = size; |
1184 | } |
1185 | void dump() const |
1186 | { |
1187 | const uint8_t *p = getCode(); |
1188 | size_t bufSize = getSize(); |
1189 | size_t remain = bufSize; |
1190 | for (int i = 0; i < 4; i++) { |
1191 | size_t disp = 16; |
1192 | if (remain < 16) { |
1193 | disp = remain; |
1194 | } |
1195 | for (size_t j = 0; j < 16; j++) { |
1196 | if (j < disp) { |
1197 | printf("%02X" , p[i * 16 + j]); |
1198 | } |
1199 | } |
1200 | putchar('\n'); |
1201 | remain -= disp; |
1202 | if (remain == 0) { |
1203 | break; |
1204 | } |
1205 | } |
1206 | } |
1207 | /* |
1208 | @param offset [in] offset from top |
1209 | @param disp [in] offset from the next of jmp |
1210 | @param size [in] write size(1, 2, 4, 8) |
1211 | */ |
1212 | void rewrite(size_t offset, uint64_t disp, size_t size) |
1213 | { |
1214 | assert(offset < maxSize_); |
1215 | if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER) |
1216 | uint8_t *const data = top_ + offset; |
1217 | for (size_t i = 0; i < size; i++) { |
1218 | data[i] = static_cast<uint8_t>(disp >> (i * 8)); |
1219 | } |
1220 | } |
1221 | void save(size_t offset, size_t val, int size, inner::LabelMode mode) |
1222 | { |
1223 | addrInfoList_.push_back(AddrInfo(offset, val, size, mode)); |
1224 | } |
1225 | bool isAutoGrow() const { return type_ == AUTO_GROW; } |
1226 | bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; } |
1227 | /** |
1228 | change exec permission of memory |
1229 | @param addr [in] buffer address |
1230 | @param size [in] buffer size |
1231 | @param protectMode [in] mode(RW/RWE/RE) |
1232 | @return true(success), false(failure) |
1233 | */ |
1234 | static inline bool protect(const void *addr, size_t size, int protectMode) |
1235 | { |
1236 | #if defined(_WIN32) |
1237 | const DWORD c_rw = PAGE_READWRITE; |
1238 | const DWORD c_rwe = PAGE_EXECUTE_READWRITE; |
1239 | const DWORD c_re = PAGE_EXECUTE_READ; |
1240 | DWORD mode; |
1241 | #else |
1242 | const int c_rw = PROT_READ | PROT_WRITE; |
1243 | const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC; |
1244 | const int c_re = PROT_READ | PROT_EXEC; |
1245 | int mode; |
1246 | #endif |
1247 | switch (protectMode) { |
1248 | case PROTECT_RW: mode = c_rw; break; |
1249 | case PROTECT_RWE: mode = c_rwe; break; |
1250 | case PROTECT_RE: mode = c_re; break; |
1251 | default: |
1252 | return false; |
1253 | } |
1254 | #if defined(_WIN32) |
1255 | DWORD oldProtect; |
1256 | return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0; |
1257 | #elif defined(__GNUC__) |
1258 | size_t pageSize = sysconf(_SC_PAGESIZE); |
1259 | size_t iaddr = reinterpret_cast<size_t>(addr); |
1260 | size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1)); |
1261 | #ifndef NDEBUG |
1262 | if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n" , pageSize); |
1263 | #endif |
1264 | return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0; |
1265 | #else |
1266 | return true; |
1267 | #endif |
1268 | } |
1269 | /** |
1270 | get aligned memory pointer |
1271 | @param addr [in] address |
1272 | @param alignedSize [in] power of two |
1273 | @return aligned addr by alingedSize |
1274 | */ |
1275 | static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16) |
1276 | { |
1277 | return reinterpret_cast<uint8_t*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1))); |
1278 | } |
1279 | }; |
1280 | |
1281 | class Address : public Operand { |
1282 | public: |
1283 | enum Mode { |
1284 | M_ModRM, |
1285 | M_64bitDisp, |
1286 | M_rip, |
1287 | M_ripAddr |
1288 | }; |
1289 | XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) |
1290 | : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast) |
1291 | { |
1292 | e_.verify(); |
1293 | } |
1294 | #ifdef XBYAK64 |
1295 | explicit XBYAK_CONSTEXPR Address(size_t disp) |
1296 | : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ } |
1297 | XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr) |
1298 | : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { } |
1299 | #endif |
1300 | RegExp getRegExp(bool optimize = true) const |
1301 | { |
1302 | return optimize ? e_.optimize() : e_; |
1303 | } |
1304 | Mode getMode() const { return mode_; } |
1305 | bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } |
1306 | bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax |
1307 | size_t getDisp() const { return e_.getDisp(); } |
1308 | uint8_t getRex() const |
1309 | { |
1310 | if (mode_ != M_ModRM) return 0; |
1311 | return getRegExp().getRex(); |
1312 | } |
1313 | bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset |
1314 | bool isBroadcast() const { return broadcast_; } |
1315 | const Label* getLabel() const { return label_; } |
1316 | bool operator==(const Address& rhs) const |
1317 | { |
1318 | return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_; |
1319 | } |
1320 | bool operator!=(const Address& rhs) const { return !operator==(rhs); } |
1321 | bool isVsib() const { return e_.isVsib(); } |
1322 | private: |
1323 | RegExp e_; |
1324 | const Label* label_; |
1325 | Mode mode_; |
1326 | bool broadcast_; |
1327 | }; |
1328 | |
1329 | inline const Address& Operand::getAddress() const |
1330 | { |
1331 | assert(isMEM()); |
1332 | return static_cast<const Address&>(*this); |
1333 | } |
1334 | |
1335 | inline bool Operand::operator==(const Operand& rhs) const |
1336 | { |
1337 | if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress(); |
1338 | return isEqualIfNotInherited(rhs); |
1339 | } |
1340 | |
1341 | class AddressFrame { |
1342 | void operator=(const AddressFrame&); |
1343 | AddressFrame(const AddressFrame&); |
1344 | public: |
1345 | const uint32_t bit_; |
1346 | const bool broadcast_; |
1347 | explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } |
1348 | Address operator[](const RegExp& e) const |
1349 | { |
1350 | return Address(bit_, broadcast_, e); |
1351 | } |
1352 | Address operator[](const void *disp) const |
1353 | { |
1354 | return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp))); |
1355 | } |
1356 | #ifdef XBYAK64 |
1357 | Address operator[](uint64_t disp) const { return Address(disp); } |
1358 | Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); } |
1359 | #endif |
1360 | }; |
1361 | |
1362 | struct JmpLabel { |
1363 | size_t endOfJmp; /* offset from top to the end address of jmp */ |
1364 | int jmpSize; |
1365 | inner::LabelMode mode; |
1366 | size_t disp; // disp for [rip + disp] |
1367 | explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0) |
1368 | : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp) |
1369 | { |
1370 | } |
1371 | }; |
1372 | |
1373 | class LabelManager; |
1374 | |
1375 | class Label { |
1376 | mutable LabelManager *mgr; |
1377 | mutable int id; |
1378 | friend class LabelManager; |
1379 | public: |
1380 | Label() : mgr(0), id(0) {} |
1381 | Label(const Label& rhs); |
1382 | Label& operator=(const Label& rhs); |
1383 | ~Label(); |
1384 | void clear() { mgr = 0; id = 0; } |
1385 | int getId() const { return id; } |
1386 | const uint8_t *getAddress() const; |
1387 | |
1388 | // backward compatibility |
1389 | static inline std::string toStr(int num) |
1390 | { |
1391 | char buf[16]; |
1392 | #if defined(_MSC_VER) && (_MSC_VER < 1900) |
1393 | _snprintf_s |
1394 | #else |
1395 | snprintf |
1396 | #endif |
1397 | (buf, sizeof(buf), ".%08x" , num); |
1398 | return buf; |
1399 | } |
1400 | }; |
1401 | |
1402 | class LabelManager { |
1403 | // for string label |
1404 | struct SlabelVal { |
1405 | size_t offset; |
1406 | SlabelVal(size_t offset) : offset(offset) {} |
1407 | }; |
1408 | typedef XBYAK_STD_UNORDERED_MAP<std::string, SlabelVal> SlabelDefList; |
1409 | typedef XBYAK_STD_UNORDERED_MULTIMAP<std::string, const JmpLabel> SlabelUndefList; |
1410 | struct SlabelState { |
1411 | SlabelDefList defList; |
1412 | SlabelUndefList undefList; |
1413 | }; |
1414 | typedef std::list<SlabelState> StateList; |
1415 | // for Label class |
1416 | struct ClabelVal { |
1417 | ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {} |
1418 | size_t offset; |
1419 | int refCount; |
1420 | }; |
1421 | typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList; |
1422 | typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList; |
1423 | typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList; |
1424 | |
1425 | CodeArray *base_; |
1426 | // global : stateList_.front(), local : stateList_.back() |
1427 | StateList stateList_; |
1428 | mutable int labelId_; |
1429 | ClabelDefList clabelDefList_; |
1430 | ClabelUndefList clabelUndefList_; |
1431 | LabelPtrList labelPtrList_; |
1432 | |
1433 | int getId(const Label& label) const |
1434 | { |
1435 | if (label.id == 0) label.id = labelId_++; |
1436 | return label.id; |
1437 | } |
1438 | template<class DefList, class UndefList, class T> |
1439 | void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset) |
1440 | { |
1441 | // add label |
1442 | typename DefList::value_type item(labelId, addrOffset); |
1443 | std::pair<typename DefList::iterator, bool> ret = defList.insert(item); |
1444 | if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED) |
1445 | // search undefined label |
1446 | for (;;) { |
1447 | typename UndefList::iterator itr = undefList.find(labelId); |
1448 | if (itr == undefList.end()) break; |
1449 | const JmpLabel *jmp = &itr->second; |
1450 | const size_t offset = jmp->endOfJmp - jmp->jmpSize; |
1451 | size_t disp; |
1452 | if (jmp->mode == inner::LaddTop) { |
1453 | disp = addrOffset; |
1454 | } else if (jmp->mode == inner::Labs) { |
1455 | disp = size_t(base_->getCurr()); |
1456 | } else { |
1457 | disp = addrOffset - jmp->endOfJmp + jmp->disp; |
1458 | #ifdef XBYAK64 |
1459 | if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) |
1460 | #endif |
1461 | if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) |
1462 | } |
1463 | if (base_->isAutoGrow()) { |
1464 | base_->save(offset, disp, jmp->jmpSize, jmp->mode); |
1465 | } else { |
1466 | base_->rewrite(offset, disp, jmp->jmpSize); |
1467 | } |
1468 | undefList.erase(itr); |
1469 | } |
1470 | } |
1471 | template<class DefList, class T> |
1472 | bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const |
1473 | { |
1474 | typename DefList::const_iterator i = defList.find(label); |
1475 | if (i == defList.end()) return false; |
1476 | *offset = i->second.offset; |
1477 | return true; |
1478 | } |
1479 | friend class Label; |
1480 | void incRefCount(int id, Label *label) |
1481 | { |
1482 | clabelDefList_[id].refCount++; |
1483 | labelPtrList_.insert(label); |
1484 | } |
1485 | void decRefCount(int id, Label *label) |
1486 | { |
1487 | labelPtrList_.erase(label); |
1488 | ClabelDefList::iterator i = clabelDefList_.find(id); |
1489 | if (i == clabelDefList_.end()) return; |
1490 | if (i->second.refCount == 1) { |
1491 | clabelDefList_.erase(id); |
1492 | } else { |
1493 | --i->second.refCount; |
1494 | } |
1495 | } |
1496 | template<class T> |
1497 | bool hasUndefinedLabel_inner(const T& list) const |
1498 | { |
1499 | #ifndef NDEBUG |
1500 | for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) { |
1501 | std::cerr << "undefined label:" << i->first << std::endl; |
1502 | } |
1503 | #endif |
1504 | return !list.empty(); |
1505 | } |
1506 | // detach all labels linked to LabelManager |
1507 | void resetLabelPtrList() |
1508 | { |
1509 | for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) { |
1510 | (*i)->clear(); |
1511 | } |
1512 | labelPtrList_.clear(); |
1513 | } |
1514 | public: |
1515 | LabelManager() |
1516 | { |
1517 | reset(); |
1518 | } |
1519 | ~LabelManager() |
1520 | { |
1521 | resetLabelPtrList(); |
1522 | } |
1523 | void reset() |
1524 | { |
1525 | base_ = 0; |
1526 | labelId_ = 1; |
1527 | stateList_.clear(); |
1528 | stateList_.push_back(SlabelState()); |
1529 | stateList_.push_back(SlabelState()); |
1530 | clabelDefList_.clear(); |
1531 | clabelUndefList_.clear(); |
1532 | resetLabelPtrList(); |
1533 | } |
1534 | void enterLocal() |
1535 | { |
1536 | stateList_.push_back(SlabelState()); |
1537 | } |
1538 | void leaveLocal() |
1539 | { |
1540 | if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL) |
1541 | if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) |
1542 | stateList_.pop_back(); |
1543 | } |
1544 | void set(CodeArray *base) { base_ = base; } |
1545 | void defineSlabel(std::string label) |
1546 | { |
1547 | if (label == "@b" || label == "@f" ) XBYAK_THROW(ERR_BAD_LABEL_STR) |
1548 | if (label == "@@" ) { |
1549 | SlabelDefList& defList = stateList_.front().defList; |
1550 | SlabelDefList::iterator i = defList.find("@f" ); |
1551 | if (i != defList.end()) { |
1552 | defList.erase(i); |
1553 | label = "@b" ; |
1554 | } else { |
1555 | i = defList.find("@b" ); |
1556 | if (i != defList.end()) { |
1557 | defList.erase(i); |
1558 | } |
1559 | label = "@f" ; |
1560 | } |
1561 | } |
1562 | SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); |
1563 | define_inner(st.defList, st.undefList, label, base_->getSize()); |
1564 | } |
1565 | void defineClabel(Label& label) |
1566 | { |
1567 | define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); |
1568 | label.mgr = this; |
1569 | labelPtrList_.insert(&label); |
1570 | } |
1571 | void assign(Label& dst, const Label& src) |
1572 | { |
1573 | ClabelDefList::const_iterator i = clabelDefList_.find(src.id); |
1574 | if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L) |
1575 | define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); |
1576 | dst.mgr = this; |
1577 | labelPtrList_.insert(&dst); |
1578 | } |
1579 | bool getOffset(size_t *offset, std::string& label) const |
1580 | { |
1581 | const SlabelDefList& defList = stateList_.front().defList; |
1582 | if (label == "@b" ) { |
1583 | if (defList.find("@f" ) != defList.end()) { |
1584 | label = "@f" ; |
1585 | } else if (defList.find("@b" ) == defList.end()) { |
1586 | XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false) |
1587 | } |
1588 | } else if (label == "@f" ) { |
1589 | if (defList.find("@f" ) != defList.end()) { |
1590 | label = "@b" ; |
1591 | } |
1592 | } |
1593 | const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); |
1594 | return getOffset_inner(st.defList, offset, label); |
1595 | } |
1596 | bool getOffset(size_t *offset, const Label& label) const |
1597 | { |
1598 | return getOffset_inner(clabelDefList_, offset, getId(label)); |
1599 | } |
1600 | void addUndefinedLabel(const std::string& label, const JmpLabel& jmp) |
1601 | { |
1602 | SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); |
1603 | st.undefList.insert(SlabelUndefList::value_type(label, jmp)); |
1604 | } |
1605 | void addUndefinedLabel(const Label& label, const JmpLabel& jmp) |
1606 | { |
1607 | clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp)); |
1608 | } |
1609 | bool hasUndefSlabel() const |
1610 | { |
1611 | for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) { |
1612 | if (hasUndefinedLabel_inner(i->undefList)) return true; |
1613 | } |
1614 | return false; |
1615 | } |
1616 | bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } |
1617 | const uint8_t *getCode() const { return base_->getCode(); } |
1618 | bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); } |
1619 | }; |
1620 | |
1621 | inline Label::Label(const Label& rhs) |
1622 | { |
1623 | id = rhs.id; |
1624 | mgr = rhs.mgr; |
1625 | if (mgr) mgr->incRefCount(id, this); |
1626 | } |
1627 | inline Label& Label::operator=(const Label& rhs) |
1628 | { |
1629 | if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this) |
1630 | id = rhs.id; |
1631 | mgr = rhs.mgr; |
1632 | if (mgr) mgr->incRefCount(id, this); |
1633 | return *this; |
1634 | } |
1635 | inline Label::~Label() |
1636 | { |
1637 | if (id && mgr) mgr->decRefCount(id, this); |
1638 | } |
1639 | inline const uint8_t* Label::getAddress() const |
1640 | { |
1641 | if (mgr == 0 || !mgr->isReady()) return 0; |
1642 | size_t offset; |
1643 | if (!mgr->getOffset(&offset, *this)) return 0; |
1644 | return mgr->getCode() + offset; |
1645 | } |
1646 | |
1647 | typedef enum { |
1648 | DefaultEncoding, |
1649 | VexEncoding, |
1650 | EvexEncoding |
1651 | } PreferredEncoding; |
1652 | |
1653 | class CodeGenerator : public CodeArray { |
1654 | public: |
1655 | enum LabelType { |
1656 | T_SHORT, |
1657 | T_NEAR, |
1658 | T_FAR, // far jump |
1659 | T_AUTO // T_SHORT if possible |
1660 | }; |
1661 | private: |
1662 | CodeGenerator operator=(const CodeGenerator&); // don't call |
1663 | #ifdef XBYAK64 |
1664 | enum { i32e = 32 | 64, BIT = 64 }; |
1665 | static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull); |
1666 | typedef Reg64 NativeReg; |
1667 | #else |
1668 | enum { i32e = 32, BIT = 32 }; |
1669 | static const size_t dummyAddr = 0x12345678; |
1670 | typedef Reg32 NativeReg; |
1671 | #endif |
1672 | // (XMM, XMM|MEM) |
1673 | static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2) |
1674 | { |
1675 | return op1.isXMM() && (op2.isXMM() || op2.isMEM()); |
1676 | } |
1677 | // (MMX, MMX|MEM) or (XMM, XMM|MEM) |
1678 | static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2) |
1679 | { |
1680 | return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2); |
1681 | } |
1682 | // (XMM, MMX|MEM) |
1683 | static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2) |
1684 | { |
1685 | return op1.isXMM() && (op2.isMMX() || op2.isMEM()); |
1686 | } |
1687 | // (MMX, XMM|MEM) |
1688 | static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2) |
1689 | { |
1690 | return op1.isMMX() && (op2.isXMM() || op2.isMEM()); |
1691 | } |
1692 | // (XMM, REG32|MEM) |
1693 | static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2) |
1694 | { |
1695 | return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM()); |
1696 | } |
1697 | // (REG32, XMM|MEM) |
1698 | static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2) |
1699 | { |
1700 | return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM()); |
1701 | } |
1702 | // (REG32, REG32|MEM) |
1703 | static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2) |
1704 | { |
1705 | return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM()); |
1706 | } |
1707 | static inline bool isValidSSE(const Operand& op1) |
1708 | { |
1709 | // SSE instructions do not support XMM16 - XMM31 |
1710 | return !(op1.isXMM() && op1.getIdx() >= 16); |
1711 | } |
1712 | void rex(const Operand& op1, const Operand& op2 = Operand()) |
1713 | { |
1714 | uint8_t rex = 0; |
1715 | const Operand *p1 = &op1, *p2 = &op2; |
1716 | if (p1->isMEM()) std::swap(p1, p2); |
1717 | if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) |
1718 | if (p2->isMEM()) { |
1719 | const Address& addr = p2->getAddress(); |
1720 | if (BIT == 64 && addr.is32bit()) db(0x67); |
1721 | rex = addr.getRex() | p1->getReg().getRex(); |
1722 | } else { |
1723 | // ModRM(reg, base); |
1724 | rex = op2.getReg().getRex(op1.getReg()); |
1725 | } |
1726 | // except movsx(16bit, 32/64bit) |
1727 | if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66); |
1728 | if (rex) db(rex); |
1729 | } |
1730 | enum AVXtype { |
1731 | // low 3 bit |
1732 | T_N1 = 1, |
1733 | T_N2 = 2, |
1734 | T_N4 = 3, |
1735 | T_N8 = 4, |
1736 | T_N16 = 5, |
1737 | T_N32 = 6, |
1738 | T_NX_MASK = 7, |
1739 | // |
1740 | T_N_VL = 1 << 3, // N * (1, 2, 4) for VL |
1741 | T_DUP = 1 << 4, // N = (8, 32, 64) |
1742 | T_66 = 1 << 5, // pp = 1 |
1743 | T_F3 = 1 << 6, // pp = 2 |
1744 | T_F2 = T_66 | T_F3, // pp = 3 |
1745 | T_ER_R = 1 << 7, // reg{er} |
1746 | T_0F = 1 << 8, |
1747 | T_0F38 = 1 << 9, |
1748 | T_0F3A = 1 << 10, |
1749 | T_L0 = 1 << 11, |
1750 | T_L1 = 1 << 12, |
1751 | T_W0 = 1 << 13, |
1752 | T_W1 = 1 << 14, |
1753 | T_EW0 = 1 << 15, |
1754 | T_EW1 = 1 << 16, |
1755 | T_YMM = 1 << 17, // support YMM, ZMM |
1756 | T_EVEX = 1 << 18, |
1757 | T_ER_X = 1 << 19, // xmm{er} |
1758 | T_ER_Y = 1 << 20, // ymm{er} |
1759 | T_ER_Z = 1 << 21, // zmm{er} |
1760 | T_SAE_X = 1 << 22, // xmm{sae} |
1761 | T_SAE_Y = 1 << 23, // ymm{sae} |
1762 | T_SAE_Z = 1 << 24, // zmm{sae} |
1763 | T_MUST_EVEX = 1 << 25, // contains T_EVEX |
1764 | T_B32 = 1 << 26, // m32bcst |
1765 | T_B64 = 1 << 27, // m64bcst |
1766 | T_B16 = T_B32 | T_B64, // m16bcst (Be careful) |
1767 | T_M_K = 1 << 28, // mem{k} |
1768 | T_VSIB = 1 << 29, |
1769 | T_MEM_EVEX = 1 << 30, // use evex if mem |
1770 | T_FP16 = 1 << 31, // avx512-fp16 |
1771 | T_MAP5 = T_FP16 | T_0F, |
1772 | T_MAP6 = T_FP16 | T_0F38, |
1773 | T_XXX |
1774 | }; |
1775 | // T_66 = 1, T_F3 = 2, T_F2 = 3 |
1776 | uint32_t getPP(int type) const { return (type >> 5) & 3; } |
1777 | void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false) |
1778 | { |
1779 | int w = (type & T_W1) ? 1 : 0; |
1780 | bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM(); |
1781 | bool r = reg.isExtIdx(); |
1782 | bool b = base.isExtIdx(); |
1783 | int idx = v ? v->getIdx() : 0; |
1784 | if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION) |
1785 | uint32_t pp = getPP(type); |
1786 | uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; |
1787 | if (!b && !x && !w && (type & T_0F)) { |
1788 | db(0xC5); db((r ? 0 : 0x80) | vvvv); |
1789 | } else { |
1790 | uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; |
1791 | db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); |
1792 | } |
1793 | db(code); |
1794 | } |
1795 | void verifySAE(const Reg& r, int type) const |
1796 | { |
1797 | if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return; |
1798 | XBYAK_THROW(ERR_SAE_IS_INVALID) |
1799 | } |
1800 | void verifyER(const Reg& r, int type) const |
1801 | { |
1802 | if ((type & T_ER_R) && r.isREG(32|64)) return; |
1803 | if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; |
1804 | XBYAK_THROW(ERR_ER_IS_INVALID) |
1805 | } |
1806 | // (a, b, c) contains non zero two or three values then err |
1807 | int verifyDuplicate(int a, int b, int c, int err) |
1808 | { |
1809 | int v = a | b | c; |
1810 | if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0) |
1811 | return v; |
1812 | } |
1813 | int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) |
1814 | { |
1815 | if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) |
1816 | int w = (type & T_EW1) ? 1 : 0; |
1817 | uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; |
1818 | if (type & T_FP16) mmm |= 4; |
1819 | uint32_t pp = getPP(type); |
1820 | int idx = v ? v->getIdx() : 0; |
1821 | uint32_t vvvv = ~idx; |
1822 | |
1823 | bool R = !reg.isExtIdx(); |
1824 | bool X = x ? false : !base.isExtIdx2(); |
1825 | bool B = !base.isExtIdx(); |
1826 | bool Rp = !reg.isExtIdx2(); |
1827 | int LL; |
1828 | int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET); |
1829 | int disp8N = 1; |
1830 | if (rounding) { |
1831 | if (rounding == EvexModifierRounding::T_SAE) { |
1832 | verifySAE(base, type); LL = 0; |
1833 | } else { |
1834 | verifyER(base, type); LL = rounding - 1; |
1835 | } |
1836 | b = true; |
1837 | } else { |
1838 | if (v) VL = (std::max)(VL, v->getBit()); |
1839 | VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL); |
1840 | LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0; |
1841 | if (b) { |
1842 | disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8; |
1843 | } else if (type & T_DUP) { |
1844 | disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64; |
1845 | } else { |
1846 | if ((type & (T_NX_MASK | T_N_VL)) == 0) { |
1847 | type |= T_N16 | T_N_VL; // default |
1848 | } |
1849 | int low = type & T_NX_MASK; |
1850 | if (low > 0) { |
1851 | disp8N = 1 << (low - 1); |
1852 | if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1); |
1853 | } |
1854 | } |
1855 | } |
1856 | bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx); |
1857 | bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); |
1858 | if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); |
1859 | if (aaa == 0) z = 0; // clear T_z if mask is not set |
1860 | db(0x62); |
1861 | db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm); |
1862 | db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3)); |
1863 | db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7)); |
1864 | db(code); |
1865 | return disp8N; |
1866 | } |
1867 | void setModRM(int mod, int r1, int r2) |
1868 | { |
1869 | db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); |
1870 | } |
1871 | void setSIB(const RegExp& e, int reg, int disp8N = 0) |
1872 | { |
1873 | uint64_t disp64 = e.getDisp(); |
1874 | #ifdef XBYAK64 |
1875 | #ifdef XBYAK_OLD_DISP_CHECK |
1876 | // treat 0xffffffff as 0xffffffffffffffff |
1877 | uint64_t high = disp64 >> 32; |
1878 | if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) |
1879 | #else |
1880 | // displacement should be a signed 32-bit value, so also check sign bit |
1881 | uint64_t high = disp64 >> 31; |
1882 | if (high != 0 && high != 0x1FFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) |
1883 | #endif |
1884 | #endif |
1885 | uint32_t disp = static_cast<uint32_t>(disp64); |
1886 | const Reg& base = e.getBase(); |
1887 | const Reg& index = e.getIndex(); |
1888 | const int baseIdx = base.getIdx(); |
1889 | const int baseBit = base.getBit(); |
1890 | const int indexBit = index.getBit(); |
1891 | enum { |
1892 | mod00 = 0, mod01 = 1, mod10 = 2 |
1893 | }; |
1894 | int mod = mod10; // disp32 |
1895 | if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) { |
1896 | mod = mod00; |
1897 | } else { |
1898 | if (disp8N == 0) { |
1899 | if (inner::IsInDisp8(disp)) { |
1900 | mod = mod01; |
1901 | } |
1902 | } else { |
1903 | // disp must be casted to signed |
1904 | uint32_t t = static_cast<uint32_t>(static_cast<int>(disp) / disp8N); |
1905 | if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) { |
1906 | disp = t; |
1907 | mod = mod01; |
1908 | } |
1909 | } |
1910 | } |
1911 | const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP; |
1912 | /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */ |
1913 | bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP; |
1914 | #ifdef XBYAK64 |
1915 | if (!baseBit && !indexBit) hasSIB = true; |
1916 | #endif |
1917 | if (hasSIB) { |
1918 | setModRM(mod, reg, Operand::ESP); |
1919 | /* SIB = [2:3:3] = [SS:index:base(=rm)] */ |
1920 | const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP; |
1921 | const int scale = e.getScale(); |
1922 | const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0; |
1923 | setModRM(SS, idx, newBaseIdx); |
1924 | } else { |
1925 | setModRM(mod, reg, newBaseIdx); |
1926 | } |
1927 | if (mod == mod01) { |
1928 | db(disp); |
1929 | } else if (mod == mod10 || (mod == mod00 && !baseBit)) { |
1930 | dd(disp); |
1931 | } |
1932 | } |
1933 | LabelManager labelMgr_; |
1934 | bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } |
1935 | void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) |
1936 | { |
1937 | rex(reg2, reg1); |
1938 | db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); |
1939 | setModRM(3, reg1.getIdx(), reg2.getIdx()); |
1940 | } |
1941 | void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) |
1942 | { |
1943 | if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) |
1944 | rex(addr, reg); |
1945 | db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); |
1946 | opAddr(addr, reg.getIdx(), immSize); |
1947 | } |
1948 | void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE) |
1949 | { |
1950 | if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) |
1951 | if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
1952 | rex(addr, reg); |
1953 | db(code0); if (code1 != NONE) db(code1); |
1954 | opAddr(addr, reg.getIdx()); |
1955 | } |
1956 | void opMIB(const Address& addr, const Reg& reg, int code0, int code1) |
1957 | { |
1958 | if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) |
1959 | if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) |
1960 | if (BIT == 64 && addr.is32bit()) db(0x67); |
1961 | const RegExp& regExp = addr.getRegExp(false); |
1962 | uint8_t rex = regExp.getRex(); |
1963 | if (rex) db(rex); |
1964 | db(code0); db(code1); |
1965 | setSIB(regExp, reg.getIdx()); |
1966 | } |
1967 | void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) |
1968 | { |
1969 | const int shortJmpSize = 2; |
1970 | const int = longPref ? 2 : 1; |
1971 | const int longJmpSize = longHeaderSize + 4; |
1972 | if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { |
1973 | db(shortCode); db(disp - shortJmpSize); |
1974 | } else { |
1975 | if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) |
1976 | if (longPref) db(longPref); |
1977 | db(longCode); dd(disp - longJmpSize); |
1978 | } |
1979 | } |
1980 | bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); } |
1981 | template<class T> |
1982 | void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) |
1983 | { |
1984 | if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED) |
1985 | if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ |
1986 | size_t offset = 0; |
1987 | if (labelMgr_.getOffset(&offset, label)) { /* label exists */ |
1988 | makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); |
1989 | } else { |
1990 | int jmpSize = 0; |
1991 | if (isNEAR(type)) { |
1992 | jmpSize = 4; |
1993 | if (longPref) db(longPref); |
1994 | db(longCode); dd(0); |
1995 | } else { |
1996 | jmpSize = 1; |
1997 | db(shortCode); db(0); |
1998 | } |
1999 | JmpLabel jmp(size_, jmpSize, inner::LasIs); |
2000 | labelMgr_.addUndefinedLabel(label, jmp); |
2001 | } |
2002 | } |
2003 | void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0) |
2004 | { |
2005 | if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2006 | if (isAutoGrow()) { |
2007 | if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW) |
2008 | if (size_ + 16 >= maxSize_) growMemory(); |
2009 | if (longPref) db(longPref); |
2010 | db(longCode); |
2011 | dd(0); |
2012 | save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); |
2013 | } else { |
2014 | makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8_t*>(addr) - getCurr()), type, shortCode, longCode, longPref); |
2015 | } |
2016 | |
2017 | } |
2018 | void opJmpOp(const Operand& op, LabelType type, int ext) |
2019 | { |
2020 | const int bit = 16|i32e; |
2021 | if (type == T_FAR) { |
2022 | if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2023 | opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false); |
2024 | } else { |
2025 | opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true); |
2026 | } |
2027 | } |
2028 | // reg is reg field of ModRM |
2029 | // immSize is the size for immediate value |
2030 | // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement |
2031 | void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false) |
2032 | { |
2033 | if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) |
2034 | if (addr.getMode() == Address::M_ModRM) { |
2035 | setSIB(addr.getRegExp(), reg, disp8N); |
2036 | } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { |
2037 | setModRM(0, reg, 5); |
2038 | if (addr.getLabel()) { // [rip + Label] |
2039 | putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize); |
2040 | } else { |
2041 | size_t disp = addr.getDisp(); |
2042 | if (addr.getMode() == Address::M_ripAddr) { |
2043 | if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) |
2044 | disp -= (size_t)getCurr() + 4 + immSize; |
2045 | } |
2046 | dd(inner::VerifyInInt32(disp)); |
2047 | } |
2048 | } |
2049 | } |
2050 | /* preCode is for SSSE3/SSE4 */ |
2051 | void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) |
2052 | { |
2053 | if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION) |
2054 | if (!isValidSSE(reg) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2055 | if (pref != NONE) db(pref); |
2056 | if (op.isMEM()) { |
2057 | opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0); |
2058 | } else { |
2059 | opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code); |
2060 | } |
2061 | if (imm8 != NONE) db(imm8); |
2062 | } |
2063 | void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext) |
2064 | { |
2065 | if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2066 | if (mmx.isXMM()) db(0x66); |
2067 | opModR(Reg32(ext), mmx, 0x0F, code); |
2068 | db(imm8); |
2069 | } |
2070 | void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE) |
2071 | { |
2072 | opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode); |
2073 | } |
2074 | void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref) |
2075 | { |
2076 | if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2077 | if (pref != NONE) db(pref); |
2078 | if (op1.isXMM() && op2.isMEM()) { |
2079 | opModM(op2.getAddress(), op1.getReg(), 0x0F, code); |
2080 | } else if (op1.isMEM() && op2.isXMM()) { |
2081 | opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1); |
2082 | } else { |
2083 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2084 | } |
2085 | } |
2086 | void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) |
2087 | { |
2088 | if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2089 | if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */ |
2090 | if (mmx.isXMM()) db(0x66); |
2091 | opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm); |
2092 | } else { |
2093 | opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A); |
2094 | } |
2095 | } |
2096 | void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0) |
2097 | { |
2098 | int opBit = op.getBit(); |
2099 | if (disableRex && opBit == 64) opBit = 32; |
2100 | if (op.isREG(bit)) { |
2101 | opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2); |
2102 | } else if (op.isMEM()) { |
2103 | opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize); |
2104 | } else { |
2105 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2106 | } |
2107 | } |
2108 | void opShift(const Operand& op, int imm, int ext) |
2109 | { |
2110 | verifyMemHasSize(op); |
2111 | opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0); |
2112 | if (imm != 1) db(imm); |
2113 | } |
2114 | void opShift(const Operand& op, const Reg8& _cl, int ext) |
2115 | { |
2116 | if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) |
2117 | opR_ModM(op, 0, ext, 0xD2); |
2118 | } |
2119 | void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) |
2120 | { |
2121 | if (condR) { |
2122 | opModR(op1.getReg(), op2.getReg(), code0, code1, code2); |
2123 | } else if (condM) { |
2124 | opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize); |
2125 | } else { |
2126 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2127 | } |
2128 | } |
2129 | void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0) |
2130 | { |
2131 | if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) |
2132 | opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1); |
2133 | if (!_cl) db(imm); |
2134 | } |
2135 | // (REG, REG|MEM), (MEM, REG) |
2136 | void opRM_RM(const Operand& op1, const Operand& op2, int code) |
2137 | { |
2138 | if (op1.isREG() && op2.isMEM()) { |
2139 | opModM(op2.getAddress(), op1.getReg(), code | 2); |
2140 | } else { |
2141 | opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code); |
2142 | } |
2143 | } |
2144 | // (REG|MEM, IMM) |
2145 | void opRM_I(const Operand& op, uint32_t imm, int code, int ext) |
2146 | { |
2147 | verifyMemHasSize(op); |
2148 | uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; |
2149 | if (op.isBit(8)) immBit = 8; |
2150 | if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) |
2151 | if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ |
2152 | if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al |
2153 | rex(op); |
2154 | db(code | 4 | (immBit == 8 ? 0 : 1)); |
2155 | } else { |
2156 | int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0; |
2157 | opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8); |
2158 | } |
2159 | db(imm, immBit / 8); |
2160 | } |
2161 | void opIncDec(const Operand& op, int code, int ext) |
2162 | { |
2163 | verifyMemHasSize(op); |
2164 | #ifndef XBYAK64 |
2165 | if (op.isREG() && !op.isBit(8)) { |
2166 | rex(op); db(code | op.getIdx()); |
2167 | return; |
2168 | } |
2169 | #endif |
2170 | code = 0xFE; |
2171 | if (op.isREG()) { |
2172 | opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code); |
2173 | } else { |
2174 | opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code); |
2175 | } |
2176 | } |
2177 | void opPushPop(const Operand& op, int code, int ext, int alt) |
2178 | { |
2179 | int bit = op.getBit(); |
2180 | if (bit == 16 || bit == BIT) { |
2181 | if (bit == 16) db(0x66); |
2182 | if (op.isREG()) { |
2183 | if (op.getReg().getIdx() >= 8) db(0x41); |
2184 | db(alt | (op.getIdx() & 7)); |
2185 | return; |
2186 | } |
2187 | if (op.isMEM()) { |
2188 | opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code); |
2189 | return; |
2190 | } |
2191 | } |
2192 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2193 | } |
2194 | void verifyMemHasSize(const Operand& op) const |
2195 | { |
2196 | if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED) |
2197 | } |
2198 | /* |
2199 | mov(r, imm) = db(imm, mov_imm(r, imm)) |
2200 | */ |
2201 | int mov_imm(const Reg& reg, uint64_t imm) |
2202 | { |
2203 | int bit = reg.getBit(); |
2204 | const int idx = reg.getIdx(); |
2205 | int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3); |
2206 | if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) { |
2207 | rex(Reg32(idx)); |
2208 | bit = 32; |
2209 | } else { |
2210 | rex(reg); |
2211 | if (bit == 64 && inner::IsInInt32(imm)) { |
2212 | db(0xC7); |
2213 | code = 0xC0; |
2214 | bit = 32; |
2215 | } |
2216 | } |
2217 | db(code | (idx & 7)); |
2218 | return bit / 8; |
2219 | } |
2220 | template<class T> |
2221 | void putL_inner(T& label, bool relative = false, size_t disp = 0) |
2222 | { |
2223 | const int jmpSize = relative ? 4 : (int)sizeof(size_t); |
2224 | if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); |
2225 | size_t offset = 0; |
2226 | if (labelMgr_.getOffset(&offset, label)) { |
2227 | if (relative) { |
2228 | db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize); |
2229 | } else if (isAutoGrow()) { |
2230 | db(uint64_t(0), jmpSize); |
2231 | save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); |
2232 | } else { |
2233 | db(size_t(top_) + offset, jmpSize); |
2234 | } |
2235 | return; |
2236 | } |
2237 | db(uint64_t(0), jmpSize); |
2238 | JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); |
2239 | labelMgr_.addUndefinedLabel(label, jmp); |
2240 | } |
2241 | void opMovxx(const Reg& reg, const Operand& op, uint8_t code) |
2242 | { |
2243 | if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) |
2244 | int w = op.isBit(16); |
2245 | bool cond = reg.isREG() && (reg.getBit() > op.getBit()); |
2246 | opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); |
2247 | } |
2248 | void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext) |
2249 | { |
2250 | if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) |
2251 | uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; |
2252 | if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE) |
2253 | if (m64ext && addr.isBit(64)) ext = m64ext; |
2254 | |
2255 | rex(addr, st0); |
2256 | db(code); |
2257 | opAddr(addr, ext); |
2258 | } |
2259 | // use code1 if reg1 == st0 |
2260 | // use code2 if reg1 != st0 && reg2 == st0 |
2261 | void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2) |
2262 | { |
2263 | uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; |
2264 | if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION) |
2265 | db(uint8_t(code >> 8)); |
2266 | db(uint8_t(code | (reg1.getIdx() | reg2.getIdx()))); |
2267 | } |
2268 | void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2) |
2269 | { |
2270 | db(code1); db(code2 | reg.getIdx()); |
2271 | } |
2272 | void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE) |
2273 | { |
2274 | if (op2.isMEM()) { |
2275 | const Address& addr = op2.getAddress(); |
2276 | const RegExp& regExp = addr.getRegExp(); |
2277 | const Reg& base = regExp.getBase(); |
2278 | const Reg& index = regExp.getIndex(); |
2279 | if (BIT == 64 && addr.is32bit()) db(0x67); |
2280 | int disp8N = 0; |
2281 | bool x = index.isExtIdx(); |
2282 | if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { |
2283 | int aaa = addr.getOpmaskIdx(); |
2284 | if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY) |
2285 | bool b = false; |
2286 | if (addr.isBroadcast()) { |
2287 | if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST) |
2288 | b = true; |
2289 | } |
2290 | int VL = regExp.isVsib() ? index.getBit() : 0; |
2291 | disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2()); |
2292 | } else { |
2293 | vex(r, base, p1, type, code, x); |
2294 | } |
2295 | opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0); |
2296 | } else { |
2297 | const Reg& base = op2.getReg(); |
2298 | if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { |
2299 | evex(r, base, p1, type, code); |
2300 | } else { |
2301 | vex(r, base, p1, type, code); |
2302 | } |
2303 | setModRM(3, r.getIdx(), base.getIdx()); |
2304 | } |
2305 | if (imm8 != NONE) db(imm8); |
2306 | } |
2307 | // (r, r, r/m) if isR_R_RM |
2308 | // (r, r/m, r) |
2309 | void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE) |
2310 | { |
2311 | const Operand *p1 = &op1; |
2312 | const Operand *p2 = &op2; |
2313 | if (!isR_R_RM) std::swap(p1, p2); |
2314 | const unsigned int bit = r.getBit(); |
2315 | if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION) |
2316 | type |= (bit == 64) ? T_W1 : T_W0; |
2317 | opVex(r, p1, *p2, type, code, imm8); |
2318 | } |
2319 | void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE) |
2320 | { |
2321 | const Xmm *x2 = static_cast<const Xmm*>(&op1); |
2322 | const Operand *op = &op2; |
2323 | if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1) |
2324 | x2 = &x1; |
2325 | op = &op1; |
2326 | } |
2327 | // (x1, x2, op) |
2328 | if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2329 | opVex(x1, x2, *op, type, code0, imm8); |
2330 | } |
2331 | void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE) |
2332 | { |
2333 | if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION) |
2334 | opVex(k, &x2, op3, type, code0, imm8); |
2335 | } |
2336 | // (x, x/m), (y, x/m256), (z, y/m) |
2337 | void checkCvt1(const Operand& x, const Operand& op) const |
2338 | { |
2339 | if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) |
2340 | } |
2341 | // (x, x/m), (x, y/m256), (y, z/m) |
2342 | void checkCvt2(const Xmm& x, const Operand& op) const |
2343 | { |
2344 | if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2345 | } |
2346 | void opCvt(const Xmm& x, const Operand& op, int type, int code) |
2347 | { |
2348 | Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; |
2349 | opVex(x.copyAndSetKind(kind), &xm0, op, type, code); |
2350 | } |
2351 | void opCvt2(const Xmm& x, const Operand& op, int type, int code) |
2352 | { |
2353 | checkCvt2(x, op); |
2354 | opCvt(x, op, type, code); |
2355 | } |
2356 | void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code) |
2357 | { |
2358 | if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
2359 | Xmm x(op.getIdx()); |
2360 | const Operand *p = op.isREG() ? &x : &op; |
2361 | opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); |
2362 | } |
2363 | // (x, x/y/xword/yword), (y, z/m) |
2364 | void checkCvt4(const Xmm& x, const Operand& op) const |
2365 | { |
2366 | if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2367 | } |
2368 | // (x, x/y/z/xword/yword/zword) |
2369 | void opCvt5(const Xmm& x, const Operand& op, int type, int code) |
2370 | { |
2371 | if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2372 | Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM; |
2373 | opVex(x.copyAndSetKind(kind), &xm0, op, type, code); |
2374 | } |
2375 | const Xmm& cvtIdx0(const Operand& x) const |
2376 | { |
2377 | return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0; |
2378 | } |
2379 | // support (x, x/m, imm), (y, y/m, imm) |
2380 | void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE) |
2381 | { |
2382 | opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8); |
2383 | } |
2384 | // QQQ:need to refactor |
2385 | void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1) |
2386 | { |
2387 | if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) |
2388 | bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); |
2389 | if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2390 | if (is16bit) db(0x66); |
2391 | db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); |
2392 | } |
2393 | void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode) |
2394 | { |
2395 | const RegExp& regExp = addr.getRegExp(); |
2396 | if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) |
2397 | const int y_vx_y = 0; |
2398 | const int y_vy_y = 1; |
2399 | // const int x_vy_x = 2; |
2400 | const bool isAddrYMM = regExp.getIndex().getBit() == 256; |
2401 | if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) { |
2402 | bool isOK = false; |
2403 | if (mode == y_vx_y) { |
2404 | isOK = x1.isYMM() && !isAddrYMM && x2.isYMM(); |
2405 | } else if (mode == y_vy_y) { |
2406 | isOK = x1.isYMM() && isAddrYMM && x2.isYMM(); |
2407 | } else { // x_vy_x |
2408 | isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); |
2409 | } |
2410 | if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) |
2411 | } |
2412 | int i1 = x1.getIdx(); |
2413 | int i2 = regExp.getIndex().getIdx(); |
2414 | int i3 = x2.getIdx(); |
2415 | if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID); |
2416 | opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code); |
2417 | } |
2418 | enum { |
2419 | xx_yy_zz = 0, |
2420 | xx_yx_zy = 1, |
2421 | xx_xy_yz = 2 |
2422 | }; |
2423 | void checkGather2(const Xmm& x1, const Reg& x2, int mode) const |
2424 | { |
2425 | if (x1.isXMM() && x2.isXMM()) return; |
2426 | switch (mode) { |
2427 | case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return; |
2428 | break; |
2429 | case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return; |
2430 | break; |
2431 | case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return; |
2432 | break; |
2433 | } |
2434 | XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) |
2435 | } |
2436 | void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode) |
2437 | { |
2438 | if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) |
2439 | const RegExp& regExp = addr.getRegExp(); |
2440 | checkGather2(x, regExp.getIndex(), mode); |
2441 | int maskIdx = x.getOpmaskIdx(); |
2442 | if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx(); |
2443 | if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID); |
2444 | if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID); |
2445 | opVex(x, 0, addr, type, code); |
2446 | } |
2447 | /* |
2448 | xx_xy_yz ; mode = true |
2449 | xx_xy_xz ; mode = false |
2450 | */ |
2451 | void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode) |
2452 | { |
2453 | if (mode) { |
2454 | if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION) |
2455 | } else { |
2456 | if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) |
2457 | } |
2458 | opVex(x, 0, op, type, code); |
2459 | } |
2460 | void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind) |
2461 | { |
2462 | if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) |
2463 | if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) |
2464 | opVex(x, 0, addr, type, code); |
2465 | } |
2466 | void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding) |
2467 | { |
2468 | opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0); |
2469 | } |
2470 | int orEvexIf(PreferredEncoding encoding) { |
2471 | if (encoding == DefaultEncoding) { |
2472 | encoding = defaultEncoding_; |
2473 | } |
2474 | if (encoding == EvexEncoding) { |
2475 | #ifdef XBYAK_DISABLE_AVX512 |
2476 | XBYAK_THROW(ERR_EVEX_IS_INVALID) |
2477 | #endif |
2478 | return T_MUST_EVEX; |
2479 | } |
2480 | return 0; |
2481 | } |
2482 | void opInOut(const Reg& a, const Reg& d, uint8_t code) |
2483 | { |
2484 | if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { |
2485 | switch (a.getBit()) { |
2486 | case 8: db(code); return; |
2487 | case 16: db(0x66); db(code + 1); return; |
2488 | case 32: db(code + 1); return; |
2489 | } |
2490 | } |
2491 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2492 | } |
2493 | void opInOut(const Reg& a, uint8_t code, uint8_t v) |
2494 | { |
2495 | if (a.getIdx() == Operand::AL) { |
2496 | switch (a.getBit()) { |
2497 | case 8: db(code); db(v); return; |
2498 | case 16: db(0x66); db(code + 1); db(v); return; |
2499 | case 32: db(code + 1); db(v); return; |
2500 | } |
2501 | } |
2502 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2503 | } |
2504 | #ifdef XBYAK64 |
2505 | void opAMX(const Tmm& t1, const Address& addr, int type, int code0) |
2506 | { |
2507 | // require both base and index |
2508 | const RegExp exp = addr.getRegExp(false); |
2509 | if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED) |
2510 | opVex(t1, &tmm0, addr, type, code0); |
2511 | } |
2512 | #endif |
2513 | public: |
2514 | unsigned int getVersion() const { return VERSION; } |
2515 | using CodeArray::db; |
2516 | const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; |
2517 | const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; |
2518 | const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; |
2519 | const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; |
2520 | const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; |
2521 | const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; |
2522 | const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; |
2523 | const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; |
2524 | const Reg16 ax, cx, dx, bx, sp, bp, si, di; |
2525 | const Reg8 al, cl, dl, bl, ah, ch, dh, bh; |
2526 | const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM |
2527 | const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b} |
2528 | const Fpu st0, st1, st2, st3, st4, st5, st6, st7; |
2529 | const Opmask k0, k1, k2, k3, k4, k5, k6, k7; |
2530 | const BoundsReg bnd0, bnd1, bnd2, bnd3; |
2531 | const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} |
2532 | const EvexModifierZero T_z; // {z} |
2533 | #ifdef XBYAK64 |
2534 | const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; |
2535 | const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; |
2536 | const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w; |
2537 | const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b; |
2538 | const Reg8 spl, bpl, sil, dil; |
2539 | const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15; |
2540 | const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23; |
2541 | const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31; |
2542 | const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15; |
2543 | const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23; |
2544 | const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31; |
2545 | const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15; |
2546 | const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23; |
2547 | const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31; |
2548 | const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7; |
2549 | const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience |
2550 | const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; |
2551 | const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; |
2552 | const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; |
2553 | const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23; |
2554 | const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31; |
2555 | const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15; |
2556 | const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23; |
2557 | const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31; |
2558 | const RegRip rip; |
2559 | #endif |
2560 | #ifndef XBYAK_DISABLE_SEGMENT |
2561 | const Segment es, cs, ss, ds, fs, gs; |
2562 | #endif |
2563 | private: |
2564 | bool isDefaultJmpNEAR_; |
2565 | PreferredEncoding defaultEncoding_; |
2566 | public: |
2567 | void L(const std::string& label) { labelMgr_.defineSlabel(label); } |
2568 | void L(Label& label) { labelMgr_.defineClabel(label); } |
2569 | Label L() { Label label; L(label); return label; } |
2570 | void inLocalLabel() { labelMgr_.enterLocal(); } |
2571 | void outLocalLabel() { labelMgr_.leaveLocal(); } |
2572 | /* |
2573 | assign src to dst |
2574 | require |
2575 | dst : does not used by L() |
2576 | src : used by L() |
2577 | */ |
2578 | void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); } |
2579 | /* |
2580 | put address of label to buffer |
2581 | @note the put size is 4(32-bit), 8(64-bit) |
2582 | */ |
2583 | void putL(std::string label) { putL_inner(label); } |
2584 | void putL(const Label& label) { putL_inner(label); } |
2585 | |
2586 | // set default type of `jmp` of undefined label to T_NEAR |
2587 | void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; } |
2588 | void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); } |
2589 | void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } |
2590 | void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } |
2591 | void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } |
2592 | void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); } |
2593 | |
2594 | void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); } |
2595 | // call(string label), not const std::string& |
2596 | void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); } |
2597 | void call(const char *label) { call(std::string(label)); } |
2598 | void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); } |
2599 | // call(function pointer) |
2600 | #ifdef XBYAK_VARIADIC_TEMPLATE |
2601 | template<class Ret, class... Params> |
2602 | void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); } |
2603 | #endif |
2604 | void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); } |
2605 | |
2606 | void test(const Operand& op, const Reg& reg) |
2607 | { |
2608 | opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84); |
2609 | } |
2610 | void test(const Operand& op, uint32_t imm) |
2611 | { |
2612 | verifyMemHasSize(op); |
2613 | int immSize = (std::min)(op.getBit() / 8, 4U); |
2614 | if (op.isREG() && op.getIdx() == 0) { // al, ax, eax |
2615 | rex(op); |
2616 | db(0xA8 | (op.isBit(8) ? 0 : 1)); |
2617 | } else { |
2618 | opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize); |
2619 | } |
2620 | db(imm, immSize); |
2621 | } |
2622 | void imul(const Reg& reg, const Operand& op) |
2623 | { |
2624 | opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF); |
2625 | } |
2626 | void imul(const Reg& reg, const Operand& op, int imm) |
2627 | { |
2628 | int s = inner::IsInDisp8(imm) ? 1 : 0; |
2629 | int immSize = s ? 1 : reg.isREG(16) ? 2 : 4; |
2630 | opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize); |
2631 | db(imm, immSize); |
2632 | } |
2633 | void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); } |
2634 | void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); } |
2635 | void push(const AddressFrame& af, uint32_t imm) |
2636 | { |
2637 | if (af.bit_ == 8) { |
2638 | db(0x6A); db(imm); |
2639 | } else if (af.bit_ == 16) { |
2640 | db(0x66); db(0x68); dw(imm); |
2641 | } else { |
2642 | db(0x68); dd(imm); |
2643 | } |
2644 | } |
2645 | /* use "push(word, 4)" if you want "push word 4" */ |
2646 | void push(uint32_t imm) |
2647 | { |
2648 | if (inner::IsInDisp8(imm)) { |
2649 | push(byte, imm); |
2650 | } else { |
2651 | push(dword, imm); |
2652 | } |
2653 | } |
2654 | void mov(const Operand& reg1, const Operand& reg2) |
2655 | { |
2656 | const Reg *reg = 0; |
2657 | const Address *addr = 0; |
2658 | uint8_t code = 0; |
2659 | if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp] |
2660 | reg = ®1.getReg(); |
2661 | addr= ®2.getAddress(); |
2662 | code = 0xA0; |
2663 | } else |
2664 | if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al |
2665 | reg = ®2.getReg(); |
2666 | addr= ®1.getAddress(); |
2667 | code = 0xA2; |
2668 | } |
2669 | #ifdef XBYAK64 |
2670 | if (addr && addr->is64bitDisp()) { |
2671 | if (code) { |
2672 | rex(*reg); |
2673 | db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3); |
2674 | db(addr->getDisp(), 8); |
2675 | } else { |
2676 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2677 | } |
2678 | } else |
2679 | #else |
2680 | if (code && addr->isOnlyDisp()) { |
2681 | rex(*reg, *addr); |
2682 | db(code | (reg->isBit(8) ? 0 : 1)); |
2683 | dd(static_cast<uint32_t>(addr->getDisp())); |
2684 | } else |
2685 | #endif |
2686 | { |
2687 | opRM_RM(reg1, reg2, 0x88); |
2688 | } |
2689 | } |
2690 | void mov(const Operand& op, uint64_t imm) |
2691 | { |
2692 | if (op.isREG()) { |
2693 | const int size = mov_imm(op.getReg(), imm); |
2694 | db(imm, size); |
2695 | } else if (op.isMEM()) { |
2696 | verifyMemHasSize(op); |
2697 | int immSize = op.getBit() / 8; |
2698 | if (immSize <= 4) { |
2699 | int64_t s = int64_t(imm) >> (immSize * 8); |
2700 | if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) |
2701 | } else { |
2702 | if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) |
2703 | immSize = 4; |
2704 | } |
2705 | opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize); |
2706 | db(static_cast<uint32_t>(imm), immSize); |
2707 | } else { |
2708 | XBYAK_THROW(ERR_BAD_COMBINATION) |
2709 | } |
2710 | } |
2711 | |
2712 | // The template is used to avoid ambiguity when the 2nd argument is 0. |
2713 | // When the 2nd argument is 0 the call goes to |
2714 | // `void mov(const Operand& op, uint64_t imm)`. |
2715 | template <typename T1, typename T2> |
2716 | void mov(const T1&, const T2 *) { T1::unexpected; } |
2717 | void mov(const NativeReg& reg, const Label& label) |
2718 | { |
2719 | mov_imm(reg, dummyAddr); |
2720 | putL(label); |
2721 | } |
2722 | void xchg(const Operand& op1, const Operand& op2) |
2723 | { |
2724 | const Operand *p1 = &op1, *p2 = &op2; |
2725 | if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { |
2726 | p1 = &op2; p2 = &op1; |
2727 | } |
2728 | if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) |
2729 | if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) |
2730 | #ifdef XBYAK64 |
2731 | && (p2->getIdx() != 0 || !p1->isREG(32)) |
2732 | #endif |
2733 | ) { |
2734 | rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7)); |
2735 | return; |
2736 | } |
2737 | opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1)); |
2738 | } |
2739 | |
2740 | #ifndef XBYAK_DISABLE_SEGMENT |
2741 | void push(const Segment& seg) |
2742 | { |
2743 | switch (seg.getIdx()) { |
2744 | case Segment::es: db(0x06); break; |
2745 | case Segment::cs: db(0x0E); break; |
2746 | case Segment::ss: db(0x16); break; |
2747 | case Segment::ds: db(0x1E); break; |
2748 | case Segment::fs: db(0x0F); db(0xA0); break; |
2749 | case Segment::gs: db(0x0F); db(0xA8); break; |
2750 | default: |
2751 | assert(0); |
2752 | } |
2753 | } |
2754 | void pop(const Segment& seg) |
2755 | { |
2756 | switch (seg.getIdx()) { |
2757 | case Segment::es: db(0x07); break; |
2758 | case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION) |
2759 | case Segment::ss: db(0x17); break; |
2760 | case Segment::ds: db(0x1F); break; |
2761 | case Segment::fs: db(0x0F); db(0xA1); break; |
2762 | case Segment::gs: db(0x0F); db(0xA9); break; |
2763 | default: |
2764 | assert(0); |
2765 | } |
2766 | } |
2767 | void putSeg(const Segment& seg) |
2768 | { |
2769 | switch (seg.getIdx()) { |
2770 | case Segment::es: db(0x2E); break; |
2771 | case Segment::cs: db(0x36); break; |
2772 | case Segment::ss: db(0x3E); break; |
2773 | case Segment::ds: db(0x26); break; |
2774 | case Segment::fs: db(0x64); break; |
2775 | case Segment::gs: db(0x65); break; |
2776 | default: |
2777 | assert(0); |
2778 | } |
2779 | } |
2780 | void mov(const Operand& op, const Segment& seg) |
2781 | { |
2782 | opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C); |
2783 | } |
2784 | void mov(const Segment& seg, const Operand& op) |
2785 | { |
2786 | opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E); |
2787 | } |
2788 | #endif |
2789 | |
2790 | enum { NONE = 256 }; |
2791 | // constructor |
2792 | CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) |
2793 | : CodeArray(maxSize, userPtr, allocator) |
2794 | , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) |
2795 | , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7) |
2796 | , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7) |
2797 | , zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7) |
2798 | // for my convenience |
2799 | , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) |
2800 | , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) |
2801 | , zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7) |
2802 | |
2803 | , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) |
2804 | , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) |
2805 | , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) |
2806 | , ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512) |
2807 | , ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true) |
2808 | , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) |
2809 | , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7) |
2810 | , bnd0(0), bnd1(1), bnd2(2), bnd3(3) |
2811 | , T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE) |
2812 | , T_z() |
2813 | #ifdef XBYAK64 |
2814 | , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15) |
2815 | , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15) |
2816 | , r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15) |
2817 | , r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15) |
2818 | , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true) |
2819 | , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15) |
2820 | , xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23) |
2821 | , xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31) |
2822 | , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15) |
2823 | , ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23) |
2824 | , ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31) |
2825 | , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15) |
2826 | , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23) |
2827 | , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31) |
2828 | , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7) |
2829 | // for my convenience |
2830 | , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) |
2831 | , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23) |
2832 | , xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31) |
2833 | , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) |
2834 | , ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23) |
2835 | , ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31) |
2836 | , zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15) |
2837 | , zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23) |
2838 | , zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31) |
2839 | , rip() |
2840 | #endif |
2841 | #ifndef XBYAK_DISABLE_SEGMENT |
2842 | , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs) |
2843 | #endif |
2844 | , isDefaultJmpNEAR_(false) |
2845 | , defaultEncoding_(EvexEncoding) |
2846 | { |
2847 | ClearError(); |
2848 | labelMgr_.set(this); |
2849 | } |
2850 | void reset() |
2851 | { |
2852 | resetSize(); |
2853 | labelMgr_.reset(); |
2854 | labelMgr_.set(this); |
2855 | } |
2856 | bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); } |
2857 | /* |
2858 | MUST call ready() to complete generating code if you use AutoGrow mode. |
2859 | It is not necessary for the other mode if hasUndefinedLabel() is true. |
2860 | */ |
2861 | void ready(ProtectMode mode = PROTECT_RWE) |
2862 | { |
2863 | if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) |
2864 | if (isAutoGrow()) { |
2865 | calcJmpAddress(); |
2866 | if (useProtect()) setProtectMode(mode); |
2867 | } |
2868 | } |
2869 | // set read/exec |
2870 | void readyRE() { return ready(PROTECT_RE); } |
2871 | #ifdef XBYAK_TEST |
2872 | void dump(bool doClear = true) |
2873 | { |
2874 | CodeArray::dump(); |
2875 | if (doClear) size_ = 0; |
2876 | } |
2877 | #endif |
2878 | |
2879 | #ifdef XBYAK_UNDEF_JNL |
2880 | #undef jnl |
2881 | #endif |
2882 | |
2883 | // set default encoding to select Vex or Evex |
2884 | void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; } |
2885 | |
2886 | /* |
2887 | use single byte nop if useMultiByteNop = false |
2888 | */ |
2889 | void nop(size_t size = 1, bool useMultiByteNop = true) |
2890 | { |
2891 | if (!useMultiByteNop) { |
2892 | for (size_t i = 0; i < size; i++) { |
2893 | db(0x90); |
2894 | } |
2895 | return; |
2896 | } |
2897 | /* |
2898 | Intel Architectures Software Developer's Manual Volume 2 |
2899 | recommended multi-byte sequence of NOP instruction |
2900 | AMD and Intel seem to agree on the same sequences for up to 9 bytes: |
2901 | https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf |
2902 | */ |
2903 | static const uint8_t nopTbl[9][9] = { |
2904 | {0x90}, |
2905 | {0x66, 0x90}, |
2906 | {0x0F, 0x1F, 0x00}, |
2907 | {0x0F, 0x1F, 0x40, 0x00}, |
2908 | {0x0F, 0x1F, 0x44, 0x00, 0x00}, |
2909 | {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, |
2910 | {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, |
2911 | {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, |
2912 | {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, |
2913 | }; |
2914 | const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]); |
2915 | while (size > 0) { |
2916 | size_t len = (std::min)(n, size); |
2917 | const uint8_t *seq = nopTbl[len - 1]; |
2918 | db(seq, len); |
2919 | size -= len; |
2920 | } |
2921 | } |
2922 | |
2923 | #ifndef XBYAK_DONT_READ_LIST |
2924 | #include "xbyak_mnemonic.h" |
2925 | /* |
2926 | use single byte nop if useMultiByteNop = false |
2927 | */ |
2928 | void align(size_t x = 16, bool useMultiByteNop = true) |
2929 | { |
2930 | if (x == 1) return; |
2931 | if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN) |
2932 | if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n" , (int)x); |
2933 | size_t remain = size_t(getCurr()) % x; |
2934 | if (remain) { |
2935 | nop(x - remain, useMultiByteNop); |
2936 | } |
2937 | } |
2938 | #endif |
2939 | }; |
2940 | |
2941 | template <> |
2942 | inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string |
2943 | { |
2944 | assert(label); |
2945 | mov_imm(reg, dummyAddr); |
2946 | putL(label); |
2947 | } |
2948 | |
2949 | namespace util { |
2950 | static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); |
2951 | static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); |
2952 | static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); |
2953 | static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); |
2954 | static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); |
2955 | static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); |
2956 | static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); |
2957 | static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); |
2958 | static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); |
2959 | static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); |
2960 | static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); |
2961 | static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); |
2962 | static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); |
2963 | static const XBYAK_CONSTEXPR EvexModifierZero T_z; |
2964 | #ifdef XBYAK64 |
2965 | static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); |
2966 | static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); |
2967 | static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); |
2968 | static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); |
2969 | static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); |
2970 | static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); |
2971 | static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); |
2972 | static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); |
2973 | static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); |
2974 | static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); |
2975 | static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); |
2976 | static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); |
2977 | static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); |
2978 | static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7); |
2979 | static const XBYAK_CONSTEXPR RegRip rip; |
2980 | #endif |
2981 | #ifndef XBYAK_DISABLE_SEGMENT |
2982 | static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); |
2983 | #endif |
2984 | } // util |
2985 | |
2986 | #ifdef _MSC_VER |
2987 | #pragma warning(pop) |
2988 | #endif |
2989 | |
2990 | #if defined(__GNUC__) && !defined(__clang__) |
2991 | #pragma GCC diagnostic pop |
2992 | #endif |
2993 | |
2994 | } // end of namespace |
2995 | |
2996 | #endif // XBYAK_XBYAK_H_ |
2997 | |