1/*******************************************************************************
2* Copyright 2016-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/*******************************************************************************
18* Copyright (c) 2007 MITSUNARI Shigeo
19* All rights reserved.
20*
21* Redistribution and use in source and binary forms, with or without
22* modification, are permitted provided that the following conditions are met:
23*
24* Redistributions of source code must retain the above copyright notice, this
25* list of conditions and the following disclaimer.
26* Redistributions in binary form must reproduce the above copyright notice,
27* this list of conditions and the following disclaimer in the documentation
28* and/or other materials provided with the distribution.
29* Neither the name of the copyright owner nor the names of its contributors may
30* be used to endorse or promote products derived from this software without
31* specific prior written permission.
32*
33* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
43* THE POSSIBILITY OF SUCH DAMAGE.
44*******************************************************************************/
45
46#pragma once
47#ifndef XBYAK_XBYAK_H_
48#define XBYAK_XBYAK_H_
49/*!
50 @file xbyak.h
51 @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++
52 @author herumi
53 @url https://github.com/herumi/xbyak
54 @note modified new BSD license
55 http://opensource.org/licenses/BSD-3-Clause
56*/
57#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not
58 #define XBYAK_NO_OP_NAMES
59#endif
60
61#include <stdio.h> // for debug print
62#include <assert.h>
63#include <list>
64#include <string>
65#include <algorithm>
66#ifndef NDEBUG
67#include <iostream>
68#endif
69
70// #define XBYAK_DISABLE_AVX512
71
72#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR)
73 #define XBYAK_USE_MMAP_ALLOCATOR
74#endif
75#if !defined(__GNUC__) || defined(__MINGW32__)
76 #undef XBYAK_USE_MMAP_ALLOCATOR
77#endif
78
79#ifdef __GNUC__
80 #define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor))
81#else
82 #define XBYAK_GNUC_PREREQ(major, minor) 0
83#endif
84
85// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
86#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
87 ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
88 #include <unordered_set>
89 #define XBYAK_STD_UNORDERED_SET std::unordered_set
90 #include <unordered_map>
91 #define XBYAK_STD_UNORDERED_MAP std::unordered_map
92 #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
93
94/*
95 Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using
96 libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
97*/
98#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
99 #include <tr1/unordered_set>
100 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
101 #include <tr1/unordered_map>
102 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
103 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
104
105#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
106 #include <unordered_set>
107 #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
108 #include <unordered_map>
109 #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
110 #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
111
112#else
113 #include <set>
114 #define XBYAK_STD_UNORDERED_SET std::set
115 #include <map>
116 #define XBYAK_STD_UNORDERED_MAP std::map
117 #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
118#endif
119#ifdef _WIN32
120 #ifndef WIN32_LEAN_AND_MEAN
121 #define WIN32_LEAN_AND_MEAN
122 #endif
123 #include <windows.h>
124 #include <malloc.h>
125 #ifdef _MSC_VER
126 #define XBYAK_TLS __declspec(thread)
127 #else
128 #define XBYAK_TLS __thread
129 #endif
130#elif defined(__GNUC__)
131 #include <unistd.h>
132 #include <sys/mman.h>
133 #include <stdlib.h>
134 #define XBYAK_TLS __thread
135#endif
136#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT)
137 #define XBYAK_USE_MAP_JIT
138 #include <sys/sysctl.h>
139 #ifndef MAP_JIT
140 #define MAP_JIT 0x800
141 #endif
142#endif
143#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
144 #include <stdint.h>
145#endif
146
147// MFD_CLOEXEC defined only linux 3.17 or later.
148// Android wraps the memfd_create syscall from API version 30.
149#if !defined(MFD_CLOEXEC) || (defined(__ANDROID__) && __ANDROID_API__ < 30)
150 #undef XBYAK_USE_MEMFD
151#endif
152
153#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
154 #define XBYAK64_WIN
155#elif defined(__x86_64__)
156 #define XBYAK64_GCC
157#endif
158#if !defined(XBYAK64) && !defined(XBYAK32)
159 #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN)
160 #define XBYAK64
161 #else
162 #define XBYAK32
163 #endif
164#endif
165
166#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900)
167 #undef XBYAK_TLS
168 #define XBYAK_TLS thread_local
169 #define XBYAK_VARIADIC_TEMPLATE
170 #define XBYAK_NOEXCEPT noexcept
171#else
172 #define XBYAK_NOEXCEPT throw()
173#endif
174
175// require c++14 or later
176// Visual Studio 2017 version 15.0 or later
177// g++-6 or later
178#if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910)
179 #define XBYAK_CONSTEXPR constexpr
180#else
181 #define XBYAK_CONSTEXPR
182#endif
183
184#ifdef _MSC_VER
185 #pragma warning(push)
186 #pragma warning(disable : 4514) /* remove inline function */
187 #pragma warning(disable : 4786) /* identifier is too long */
188 #pragma warning(disable : 4503) /* name is too long */
189 #pragma warning(disable : 4127) /* constant expresison */
190#endif
191
192// disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603
193#if defined(__GNUC__) && !defined(__clang__)
194 #define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS
195 #pragma GCC diagnostic push
196 #pragma GCC diagnostic ignored "-Warray-bounds"
197#endif
198
199namespace Xbyak {
200
201enum {
202 DEFAULT_MAX_CODE_SIZE = 4096,
203 VERSION = 0x6630 /* 0xABCD = A.BC(.D) */
204};
205
206#ifndef MIE_INTEGER_TYPE_DEFINED
207#define MIE_INTEGER_TYPE_DEFINED
208// for backward compatibility
209typedef uint64_t uint64;
210typedef int64_t sint64;
211typedef uint32_t uint32;
212typedef uint16_t uint16;
213typedef uint8_t uint8;
214#endif
215
216#ifndef MIE_ALIGN
217 #ifdef _MSC_VER
218 #define MIE_ALIGN(x) __declspec(align(x))
219 #else
220 #define MIE_ALIGN(x) __attribute__((aligned(x)))
221 #endif
222#endif
223#ifndef MIE_PACK // for shufps
224 #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
225#endif
226
227enum {
228 ERR_NONE = 0,
229 ERR_BAD_ADDRESSING,
230 ERR_CODE_IS_TOO_BIG,
231 ERR_BAD_SCALE,
232 ERR_ESP_CANT_BE_INDEX,
233 ERR_BAD_COMBINATION,
234 ERR_BAD_SIZE_OF_REGISTER,
235 ERR_IMM_IS_TOO_BIG,
236 ERR_BAD_ALIGN,
237 ERR_LABEL_IS_REDEFINED,
238 ERR_LABEL_IS_TOO_FAR,
239 ERR_LABEL_IS_NOT_FOUND,
240 ERR_CODE_ISNOT_COPYABLE,
241 ERR_BAD_PARAMETER,
242 ERR_CANT_PROTECT,
243 ERR_CANT_USE_64BIT_DISP,
244 ERR_OFFSET_IS_TOO_BIG,
245 ERR_MEM_SIZE_IS_NOT_SPECIFIED,
246 ERR_BAD_MEM_SIZE,
247 ERR_BAD_ST_COMBINATION,
248 ERR_OVER_LOCAL_LABEL, // not used
249 ERR_UNDER_LOCAL_LABEL,
250 ERR_CANT_ALLOC,
251 ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW,
252 ERR_BAD_PROTECT_MODE,
253 ERR_BAD_PNUM,
254 ERR_BAD_TNUM,
255 ERR_BAD_VSIB_ADDRESSING,
256 ERR_CANT_CONVERT,
257 ERR_LABEL_ISNOT_SET_BY_L,
258 ERR_LABEL_IS_ALREADY_SET_BY_L,
259 ERR_BAD_LABEL_STR,
260 ERR_MUNMAP,
261 ERR_OPMASK_IS_ALREADY_SET,
262 ERR_ROUNDING_IS_ALREADY_SET,
263 ERR_K0_IS_INVALID,
264 ERR_EVEX_IS_INVALID,
265 ERR_SAE_IS_INVALID,
266 ERR_ER_IS_INVALID,
267 ERR_INVALID_BROADCAST,
268 ERR_INVALID_OPMASK_WITH_MEMORY,
269 ERR_INVALID_ZERO,
270 ERR_INVALID_RIP_IN_AUTO_GROW,
271 ERR_INVALID_MIB_ADDRESS,
272 ERR_X2APIC_IS_NOT_SUPPORTED,
273 ERR_NOT_SUPPORTED,
274 ERR_SAME_REGS_ARE_INVALID,
275 ERR_INTERNAL // Put it at last.
276};
277
278inline const char *ConvertErrorToString(int err)
279{
280 static const char *errTbl[] = {
281 "none",
282 "bad addressing",
283 "code is too big",
284 "bad scale",
285 "esp can't be index",
286 "bad combination",
287 "bad size of register",
288 "imm is too big",
289 "bad align",
290 "label is redefined",
291 "label is too far",
292 "label is not found",
293 "code is not copyable",
294 "bad parameter",
295 "can't protect",
296 "can't use 64bit disp(use (void*))",
297 "offset is too big",
298 "MEM size is not specified",
299 "bad mem size",
300 "bad st combination",
301 "over local label",
302 "under local label",
303 "can't alloc",
304 "T_SHORT is not supported in AutoGrow",
305 "bad protect mode",
306 "bad pNum",
307 "bad tNum",
308 "bad vsib addressing",
309 "can't convert",
310 "label is not set by L()",
311 "label is already set by L()",
312 "bad label string",
313 "err munmap",
314 "opmask is already set",
315 "rounding is already set",
316 "k0 is invalid",
317 "evex is invalid",
318 "sae(suppress all exceptions) is invalid",
319 "er(embedded rounding) is invalid",
320 "invalid broadcast",
321 "invalid opmask with memory",
322 "invalid zero",
323 "invalid rip in AutoGrow",
324 "invalid mib address",
325 "x2APIC is not supported",
326 "not supported",
327 "same regs are invalid",
328 "internal error"
329 };
330 assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
331 return err <= ERR_INTERNAL ? errTbl[err] : "unknown err";
332}
333
334#ifdef XBYAK_NO_EXCEPTION
335namespace local {
336
337inline int& GetErrorRef() {
338 static XBYAK_TLS int err = 0;
339 return err;
340}
341
342inline void SetError(int err) {
343 if (local::GetErrorRef()) return; // keep the first err code
344 local::GetErrorRef() = err;
345}
346
347} // local
348
349inline void ClearError() {
350 local::GetErrorRef() = 0;
351}
352inline int GetError() { return Xbyak::local::GetErrorRef(); }
353
354#define XBYAK_THROW(err) { Xbyak::local::SetError(err); return; }
355#define XBYAK_THROW_RET(err, r) { Xbyak::local::SetError(err); return r; }
356
357#else
358class Error : public std::exception {
359 int err_;
360public:
361 explicit Error(int err) : err_(err)
362 {
363 if (err_ < 0 || err_ > ERR_INTERNAL) {
364 err_ = ERR_INTERNAL;
365 }
366 }
367 operator int() const { return err_; }
368 const char *what() const XBYAK_NOEXCEPT
369 {
370 return ConvertErrorToString(err_);
371 }
372};
373
374// dummy functions
375inline void ClearError() { }
376inline int GetError() { return 0; }
377
378inline const char *ConvertErrorToString(const Error& err)
379{
380 return err.what();
381}
382
383#define XBYAK_THROW(err) { throw Error(err); }
384#define XBYAK_THROW_RET(err, r) { throw Error(err); }
385
386#endif
387
388inline void *AlignedMalloc(size_t size, size_t alignment)
389{
390#ifdef __MINGW32__
391 return __mingw_aligned_malloc(size, alignment);
392#elif defined(_WIN32)
393 return _aligned_malloc(size, alignment);
394#else
395 void *p;
396 int ret = posix_memalign(&p, alignment, size);
397 return (ret == 0) ? p : 0;
398#endif
399}
400
401inline void AlignedFree(void *p)
402{
403#ifdef __MINGW32__
404 __mingw_aligned_free(p);
405#elif defined(_MSC_VER)
406 _aligned_free(p);
407#else
408 free(p);
409#endif
410}
411
412template<class To, class From>
413inline const To CastTo(From p) XBYAK_NOEXCEPT
414{
415 return (const To)(size_t)(p);
416}
417namespace inner {
418
419static const size_t ALIGN_PAGE_SIZE = 4096;
420
421inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
422inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
423
424inline uint32_t VerifyInInt32(uint64_t x)
425{
426#ifdef XBYAK64
427 if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
428#endif
429 return static_cast<uint32_t>(x);
430}
431
432enum LabelMode {
433 LasIs, // as is
434 Labs, // absolute
435 LaddTop // (addr + top) for mov(reg, label) with AutoGrow
436};
437
438} // inner
439
440/*
441 custom allocator
442*/
443struct Allocator {
444 explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator
445 virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
446 virtual void free(uint8_t *p) { AlignedFree(p); }
447 virtual ~Allocator() {}
448 /* override to return false if you call protect() manually */
449 virtual bool useProtect() const { return true; }
450};
451
452#ifdef XBYAK_USE_MMAP_ALLOCATOR
453#ifdef XBYAK_USE_MAP_JIT
454namespace util {
455
456inline int getMacOsVersionPure()
457{
458 char buf[64];
459 size_t size = sizeof(buf);
460 int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0);
461 if (err != 0) return 0;
462 char *endp;
463 int major = strtol(buf, &endp, 10);
464 if (*endp != '.') return 0;
465 return major;
466}
467
468inline int getMacOsVersion()
469{
470 static const int version = getMacOsVersionPure();
471 return version;
472}
473
474} // util
475#endif
476class MmapAllocator : public Allocator {
477 struct Allocation {
478 size_t size;
479#if defined(XBYAK_USE_MEMFD)
480 // fd_ is only used with XBYAK_USE_MEMFD. We keep the file open
481 // during the lifetime of each allocation in order to support
482 // checkpoint/restore by unprivileged users.
483 int fd;
484#endif
485 };
486 const std::string name_; // only used with XBYAK_USE_MEMFD
487 typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, Allocation> AllocationList;
488 AllocationList allocList_;
489public:
490 explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
491 uint8_t *alloc(size_t size)
492 {
493 const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
494 size = (size + alignedSizeM1) & ~alignedSizeM1;
495#if defined(MAP_ANONYMOUS)
496 int mode = MAP_PRIVATE | MAP_ANONYMOUS;
497#elif defined(MAP_ANON)
498 int mode = MAP_PRIVATE | MAP_ANON;
499#else
500 #error "not supported"
501#endif
502#if defined(XBYAK_USE_MAP_JIT)
503 const int mojaveVersion = 18;
504 if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
505#endif
506 int fd = -1;
507#if defined(XBYAK_USE_MEMFD)
508 fd = memfd_create(name_.c_str(), MFD_CLOEXEC);
509 if (fd != -1) {
510 mode = MAP_SHARED;
511 if (ftruncate(fd, size) != 0) {
512 close(fd);
513 XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
514 }
515 }
516#endif
517 void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0);
518 if (p == MAP_FAILED) {
519 if (fd != -1) close(fd);
520 XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
521 }
522 assert(p);
523 Allocation &alloc = allocList_[(uintptr_t)p];
524 alloc.size = size;
525#if defined(XBYAK_USE_MEMFD)
526 alloc.fd = fd;
527#endif
528 return (uint8_t*)p;
529 }
530 void free(uint8_t *p)
531 {
532 if (p == 0) return;
533 AllocationList::iterator i = allocList_.find((uintptr_t)p);
534 if (i == allocList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
535 if (munmap((void*)i->first, i->second.size) < 0) XBYAK_THROW(ERR_MUNMAP)
536#if defined(XBYAK_USE_MEMFD)
537 if (i->second.fd != -1) close(i->second.fd);
538#endif
539 allocList_.erase(i);
540 }
541};
542#else
543typedef Allocator MmapAllocator;
544#endif
545
546class Address;
547class Reg;
548
549class Operand {
550 static const uint8_t EXT8BIT = 0x20;
551 unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
552 unsigned int kind_:10;
553 unsigned int bit_:14;
554protected:
555 unsigned int zero_:1;
556 unsigned int mask_:3;
557 unsigned int rounding_:3;
558 void setIdx(int idx) { idx_ = idx; }
559public:
560 enum Kind {
561 NONE = 0,
562 MEM = 1 << 0,
563 REG = 1 << 1,
564 MMX = 1 << 2,
565 FPU = 1 << 3,
566 XMM = 1 << 4,
567 YMM = 1 << 5,
568 ZMM = 1 << 6,
569 OPMASK = 1 << 7,
570 BNDREG = 1 << 8,
571 TMM = 1 << 9
572 };
573 enum Code {
574#ifdef XBYAK64
575 RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
576 R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
577 R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
578 R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
579 SPL = 4, BPL, SIL, DIL,
580#endif
581 EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
582 AX = 0, CX, DX, BX, SP, BP, SI, DI,
583 AL = 0, CL, DL, BL, AH, CH, DH, BH
584 };
585 XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { }
586 XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0)
587 : idx_(static_cast<uint8_t>(idx | (ext8bit ? EXT8BIT : 0)))
588 , kind_(kind)
589 , bit_(bit)
590 , zero_(0), mask_(0), rounding_(0)
591 {
592 assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
593 }
594 XBYAK_CONSTEXPR Kind getKind() const { return static_cast<Kind>(kind_); }
595 XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); }
596 XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; }
597 XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); }
598 XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); }
599 XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); }
600 XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); }
601 XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); }
602 XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); }
603 XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); }
604 XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); }
605 XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); }
606 XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); }
607 XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); }
608 XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); }
609 XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); }
610 XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; }
611 XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; }
612 XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; }
613 XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
614 XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
615 XBYAK_CONSTEXPR bool hasZero() const { return zero_; }
616 XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; }
617 XBYAK_CONSTEXPR int getRounding() const { return rounding_; }
618 void setKind(Kind kind)
619 {
620 if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return;
621 kind_ = kind;
622 bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192;
623 }
624 // err if MMX/FPU/OPMASK/BNDREG
625 void setBit(int bit);
626 void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
627 {
628 if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
629 mask_ = idx;
630 }
631 void setRounding(int idx)
632 {
633 if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
634 rounding_ = idx;
635 }
636 void setZero() { zero_ = true; }
637 // ah, ch, dh, bh?
638 bool isHigh8bit() const
639 {
640 if (!isBit(8)) return false;
641 if (isExt8bit()) return false;
642 const int idx = getIdx();
643 return AH <= idx && idx <= BH;
644 }
645 // any bit is accetable if bit == 0
646 XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const
647 {
648 return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
649 }
650 XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; }
651 XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; }
652 const char *toString() const
653 {
654 const int idx = getIdx();
655 if (kind_ == REG) {
656 if (isExt8bit()) {
657 static const char *tbl[4] = { "spl", "bpl", "sil", "dil" };
658 return tbl[idx - 4];
659 }
660 static const char *tbl[4][16] = {
661 { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
662 { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
663 { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
664 { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
665 };
666 return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
667 } else if (isOPMASK()) {
668 static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" };
669 return tbl[idx];
670 } else if (isTMM()) {
671 static const char *tbl[8] = {
672 "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7"
673 };
674 return tbl[idx];
675 } else if (isZMM()) {
676 static const char *tbl[32] = {
677 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
678 "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31"
679 };
680 return tbl[idx];
681 } else if (isYMM()) {
682 static const char *tbl[32] = {
683 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
684 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31"
685 };
686 return tbl[idx];
687 } else if (isXMM()) {
688 static const char *tbl[32] = {
689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
690 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
691 };
692 return tbl[idx];
693 } else if (isMMX()) {
694 static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" };
695 return tbl[idx];
696 } else if (isFPU()) {
697 static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" };
698 return tbl[idx];
699 } else if (isBNDREG()) {
700 static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" };
701 return tbl[idx];
702 }
703 XBYAK_THROW_RET(ERR_INTERNAL, 0);
704 }
705 bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
706 bool operator==(const Operand& rhs) const;
707 bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
708 const Address& getAddress() const;
709 const Reg& getReg() const;
710};
711
712inline void Operand::setBit(int bit)
713{
714 if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR;
715 if (isBit(bit)) return;
716 if (is(MEM | OPMASK)) {
717 bit_ = bit;
718 return;
719 }
720 if (is(REG | XMM | YMM | ZMM | TMM)) {
721 int idx = getIdx();
722 // err if converting ah, bh, ch, dh
723 if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR;
724 Kind kind = REG;
725 switch (bit) {
726 case 8:
727 if (idx >= 16) goto ERR;
728#ifdef XBYAK32
729 if (idx >= 4) goto ERR;
730#else
731 if (4 <= idx && idx < 8) idx |= EXT8BIT;
732#endif
733 break;
734 case 16:
735 case 32:
736 case 64:
737 if (idx >= 16) goto ERR;
738 break;
739 case 128: kind = XMM; break;
740 case 256: kind = YMM; break;
741 case 512: kind = ZMM; break;
742 case 8192: kind = TMM; break;
743 }
744 idx_ = idx;
745 kind_ = kind;
746 bit_ = bit;
747 if (bit >= 128) return; // keep mask_ and rounding_
748 mask_ = 0;
749 rounding_ = 0;
750 return;
751 }
752ERR:
753 XBYAK_THROW(ERR_CANT_CONVERT)
754}
755
756class Label;
757
758struct Reg8;
759struct Reg16;
760struct Reg32;
761#ifdef XBYAK64
762struct Reg64;
763#endif
764class Reg : public Operand {
765public:
766 XBYAK_CONSTEXPR Reg() { }
767 XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { }
768 // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
769 Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; }
770 uint8_t getRexW() const { return isREG(64) ? 8 : 0; }
771 uint8_t getRexR() const { return isExtIdx() ? 4 : 0; }
772 uint8_t getRexX() const { return isExtIdx() ? 2 : 0; }
773 uint8_t getRexB() const { return isExtIdx() ? 1 : 0; }
774 uint8_t getRex(const Reg& base = Reg()) const
775 {
776 uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB();
777 if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40;
778 return rex;
779 }
780 Reg8 cvt8() const;
781 Reg16 cvt16() const;
782 Reg32 cvt32() const;
783#ifdef XBYAK64
784 Reg64 cvt64() const;
785#endif
786};
787
788inline const Reg& Operand::getReg() const
789{
790 assert(!isMEM());
791 return static_cast<const Reg&>(*this);
792}
793
794struct Reg8 : public Reg {
795 explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
796};
797
798struct Reg16 : public Reg {
799 explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { }
800};
801
802struct Mmx : public Reg {
803 explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { }
804};
805
806struct EvexModifierRounding {
807 enum {
808 T_RN_SAE = 1,
809 T_RD_SAE = 2,
810 T_RU_SAE = 3,
811 T_RZ_SAE = 4,
812 T_SAE = 5
813 };
814 explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {}
815 int rounding;
816};
817struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}};
818
819struct Xmm : public Mmx {
820 explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { }
821 XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { }
822 Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; }
823 Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; }
824 Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; }
825};
826
827struct Ymm : public Xmm {
828 explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { }
829 Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; }
830};
831
832struct Zmm : public Ymm {
833 explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { }
834 Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; }
835};
836
837#ifdef XBYAK64
838struct Tmm : public Reg {
839 explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { }
840};
841#endif
842
843struct Opmask : public Reg {
844 explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {}
845};
846
847struct BoundsReg : public Reg {
848 explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {}
849};
850
851template<class T>T operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; }
852template<class T>T operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; }
853template<class T>T operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; }
854
855struct Fpu : public Reg {
856 explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { }
857};
858
859struct Reg32e : public Reg {
860 explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {}
861};
862struct Reg32 : public Reg32e {
863 explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {}
864};
865#ifdef XBYAK64
866struct Reg64 : public Reg32e {
867 explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {}
868};
869struct RegRip {
870 int64_t disp_;
871 const Label* label_;
872 bool isAddr_;
873 explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
874 friend const RegRip operator+(const RegRip& r, int disp) {
875 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
876 }
877 friend const RegRip operator-(const RegRip& r, int disp) {
878 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
879 }
880 friend const RegRip operator+(const RegRip& r, int64_t disp) {
881 return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
882 }
883 friend const RegRip operator-(const RegRip& r, int64_t disp) {
884 return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
885 }
886 friend const RegRip operator+(const RegRip& r, const Label& label) {
887 if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
888 return RegRip(r.disp_, &label);
889 }
890 friend const RegRip operator+(const RegRip& r, const void *addr) {
891 if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
892 return RegRip(r.disp_ + (int64_t)addr, 0, true);
893 }
894};
895#endif
896
897inline Reg8 Reg::cvt8() const
898{
899 Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit());
900}
901
902inline Reg16 Reg::cvt16() const
903{
904 return Reg16(changeBit(16).getIdx());
905}
906
907inline Reg32 Reg::cvt32() const
908{
909 return Reg32(changeBit(32).getIdx());
910}
911
912#ifdef XBYAK64
913inline Reg64 Reg::cvt64() const
914{
915 return Reg64(changeBit(64).getIdx());
916}
917#endif
918
919#ifndef XBYAK_DISABLE_SEGMENT
920// not derived from Reg
921class Segment {
922 int idx_;
923public:
924 enum {
925 es, cs, ss, ds, fs, gs
926 };
927 explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); }
928 int getIdx() const { return idx_; }
929 const char *toString() const
930 {
931 static const char tbl[][3] = {
932 "es", "cs", "ss", "ds", "fs", "gs"
933 };
934 return tbl[idx_];
935 }
936};
937#endif
938
939class RegExp {
940public:
941#ifdef XBYAK64
942 enum { i32e = 32 | 64 };
943#else
944 enum { i32e = 32 };
945#endif
946 XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { }
947 XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1)
948 : scale_(scale)
949 , disp_(0)
950 {
951 if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
952 if (scale == 0) return;
953 if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE)
954 if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
955 index_ = r;
956 } else {
957 base_ = r;
958 }
959 }
960 bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
961 RegExp optimize() const
962 {
963 RegExp exp = *this;
964 // [reg * 2] => [reg + reg]
965 if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
966 exp.base_ = index_;
967 exp.scale_ = 1;
968 }
969 return exp;
970 }
971 bool operator==(const RegExp& rhs) const
972 {
973 return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_;
974 }
975 const Reg& getBase() const { return base_; }
976 const Reg& getIndex() const { return index_; }
977 int getScale() const { return scale_; }
978 size_t getDisp() const { return disp_; }
979 XBYAK_CONSTEXPR void verify() const
980 {
981 if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
982 if (index_.getBit() && index_.getBit() <= 64) {
983 if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX)
984 if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
985 }
986 }
987 friend RegExp operator+(const RegExp& a, const RegExp& b);
988 friend RegExp operator-(const RegExp& e, size_t disp);
989 uint8_t getRex() const
990 {
991 uint8_t rex = index_.getRexX() | base_.getRexB();
992 return rex ? uint8_t(rex | 0x40) : 0;
993 }
994private:
995 /*
996 [base_ + index_ * scale_ + disp_]
997 base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm
998 */
999 Reg base_;
1000 Reg index_;
1001 int scale_;
1002 size_t disp_;
1003};
1004
1005inline RegExp operator+(const RegExp& a, const RegExp& b)
1006{
1007 if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
1008 RegExp ret = a;
1009 if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
1010 if (b.base_.getBit()) {
1011 if (ret.base_.getBit()) {
1012 if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
1013 // base + base => base + index * 1
1014 ret.index_ = b.base_;
1015 // [reg + esp] => [esp + reg]
1016 if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_);
1017 ret.scale_ = 1;
1018 } else {
1019 ret.base_ = b.base_;
1020 }
1021 }
1022 ret.disp_ += b.disp_;
1023 return ret;
1024}
1025inline RegExp operator*(const Reg& r, int scale)
1026{
1027 return RegExp(r, scale);
1028}
1029inline RegExp operator*(int scale, const Reg& r)
1030{
1031 return r * scale;
1032}
1033inline RegExp operator-(const RegExp& e, size_t disp)
1034{
1035 RegExp ret = e;
1036 ret.disp_ -= disp;
1037 return ret;
1038}
1039
1040// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
1041void *const AutoGrow = (void*)1; //-V566
1042void *const DontSetProtectRWE = (void*)2; //-V566
1043
1044class CodeArray {
1045 enum Type {
1046 USER_BUF = 1, // use userPtr(non alignment, non protect)
1047 ALLOC_BUF, // use new(alignment, protect)
1048 AUTO_GROW // automatically move and grow memory if necessary
1049 };
1050 CodeArray(const CodeArray& rhs);
1051 void operator=(const CodeArray&);
1052 bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; }
1053 struct AddrInfo {
1054 size_t codeOffset; // position to write
1055 size_t jmpAddr; // value to write
1056 int jmpSize; // size of jmpAddr
1057 inner::LabelMode mode;
1058 AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode)
1059 : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {}
1060 uint64_t getVal(const uint8_t *top) const
1061 {
1062 uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top);
1063 if (jmpSize == 4) disp = inner::VerifyInInt32(disp);
1064 return disp;
1065 }
1066 };
1067 typedef std::list<AddrInfo> AddrInfoList;
1068 AddrInfoList addrInfoList_;
1069 const Type type_;
1070#ifdef XBYAK_USE_MMAP_ALLOCATOR
1071 MmapAllocator defaultAllocator_;
1072#else
1073 Allocator defaultAllocator_;
1074#endif
1075 Allocator *alloc_;
1076protected:
1077 size_t maxSize_;
1078 uint8_t *top_;
1079 size_t size_;
1080 bool isCalledCalcJmpAddress_;
1081
1082 bool useProtect() const { return alloc_->useProtect(); }
1083 /*
1084 allocate new memory and copy old data to the new area
1085 */
1086 void growMemory()
1087 {
1088 const size_t newSize = (std::max<size_t>)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
1089 uint8_t *newTop = alloc_->alloc(newSize);
1090 if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1091 for (size_t i = 0; i < size_; i++) newTop[i] = top_[i];
1092 alloc_->free(top_);
1093 top_ = newTop;
1094 maxSize_ = newSize;
1095 }
1096 /*
1097 calc jmp address for AutoGrow mode
1098 */
1099 void calcJmpAddress()
1100 {
1101 if (isCalledCalcJmpAddress_) return;
1102 for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) {
1103 uint64_t disp = i->getVal(top_);
1104 rewrite(i->codeOffset, disp, i->jmpSize);
1105 }
1106 isCalledCalcJmpAddress_ = true;
1107 }
1108public:
1109 enum ProtectMode {
1110 PROTECT_RW = 0, // read/write
1111 PROTECT_RWE = 1, // read/write/exec
1112 PROTECT_RE = 2 // read/exec
1113 };
1114 explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
1115 : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
1116 , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
1117 , maxSize_(maxSize)
1118 , top_(type_ == USER_BUF ? reinterpret_cast<uint8_t*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
1119 , size_(0)
1120 , isCalledCalcJmpAddress_(false)
1121 {
1122 if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC)
1123 if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
1124 alloc_->free(top_);
1125 XBYAK_THROW(ERR_CANT_PROTECT)
1126 }
1127 }
1128 virtual ~CodeArray()
1129 {
1130 if (isAllocType()) {
1131 if (useProtect()) setProtectModeRW(false);
1132 alloc_->free(top_);
1133 }
1134 }
1135 bool setProtectMode(ProtectMode mode, bool throwException = true)
1136 {
1137 bool isOK = protect(top_, maxSize_, mode);
1138 if (isOK) return true;
1139 if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false)
1140 return false;
1141 }
1142 bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
1143 bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
1144 void resetSize()
1145 {
1146 size_ = 0;
1147 addrInfoList_.clear();
1148 isCalledCalcJmpAddress_ = false;
1149 }
1150 void db(int code)
1151 {
1152 if (size_ >= maxSize_) {
1153 if (type_ == AUTO_GROW) {
1154 growMemory();
1155 } else {
1156 XBYAK_THROW(ERR_CODE_IS_TOO_BIG)
1157 }
1158 }
1159 top_[size_++] = static_cast<uint8_t>(code);
1160 }
1161 void db(const uint8_t *code, size_t codeSize)
1162 {
1163 for (size_t i = 0; i < codeSize; i++) db(code[i]);
1164 }
1165 void db(uint64_t code, size_t codeSize)
1166 {
1167 if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1168 for (size_t i = 0; i < codeSize; i++) db(static_cast<uint8_t>(code >> (i * 8)));
1169 }
1170 void dw(uint32_t code) { db(code, 2); }
1171 void dd(uint32_t code) { db(code, 4); }
1172 void dq(uint64_t code) { db(code, 8); }
1173 const uint8_t *getCode() const { return top_; }
1174 template<class F>
1175 const F getCode() const { return reinterpret_cast<F>(top_); }
1176 const uint8_t *getCurr() const { return &top_[size_]; }
1177 template<class F>
1178 const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
1179 size_t getSize() const { return size_; }
1180 void setSize(size_t size)
1181 {
1182 if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1183 size_ = size;
1184 }
1185 void dump() const
1186 {
1187 const uint8_t *p = getCode();
1188 size_t bufSize = getSize();
1189 size_t remain = bufSize;
1190 for (int i = 0; i < 4; i++) {
1191 size_t disp = 16;
1192 if (remain < 16) {
1193 disp = remain;
1194 }
1195 for (size_t j = 0; j < 16; j++) {
1196 if (j < disp) {
1197 printf("%02X", p[i * 16 + j]);
1198 }
1199 }
1200 putchar('\n');
1201 remain -= disp;
1202 if (remain == 0) {
1203 break;
1204 }
1205 }
1206 }
1207 /*
1208 @param offset [in] offset from top
1209 @param disp [in] offset from the next of jmp
1210 @param size [in] write size(1, 2, 4, 8)
1211 */
1212 void rewrite(size_t offset, uint64_t disp, size_t size)
1213 {
1214 assert(offset < maxSize_);
1215 if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER)
1216 uint8_t *const data = top_ + offset;
1217 for (size_t i = 0; i < size; i++) {
1218 data[i] = static_cast<uint8_t>(disp >> (i * 8));
1219 }
1220 }
1221 void save(size_t offset, size_t val, int size, inner::LabelMode mode)
1222 {
1223 addrInfoList_.push_back(AddrInfo(offset, val, size, mode));
1224 }
1225 bool isAutoGrow() const { return type_ == AUTO_GROW; }
1226 bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; }
1227 /**
1228 change exec permission of memory
1229 @param addr [in] buffer address
1230 @param size [in] buffer size
1231 @param protectMode [in] mode(RW/RWE/RE)
1232 @return true(success), false(failure)
1233 */
1234 static inline bool protect(const void *addr, size_t size, int protectMode)
1235 {
1236#if defined(_WIN32)
1237 const DWORD c_rw = PAGE_READWRITE;
1238 const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
1239 const DWORD c_re = PAGE_EXECUTE_READ;
1240 DWORD mode;
1241#else
1242 const int c_rw = PROT_READ | PROT_WRITE;
1243 const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
1244 const int c_re = PROT_READ | PROT_EXEC;
1245 int mode;
1246#endif
1247 switch (protectMode) {
1248 case PROTECT_RW: mode = c_rw; break;
1249 case PROTECT_RWE: mode = c_rwe; break;
1250 case PROTECT_RE: mode = c_re; break;
1251 default:
1252 return false;
1253 }
1254#if defined(_WIN32)
1255 DWORD oldProtect;
1256 return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
1257#elif defined(__GNUC__)
1258 size_t pageSize = sysconf(_SC_PAGESIZE);
1259 size_t iaddr = reinterpret_cast<size_t>(addr);
1260 size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
1261#ifndef NDEBUG
1262 if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
1263#endif
1264 return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
1265#else
1266 return true;
1267#endif
1268 }
1269 /**
1270 get aligned memory pointer
1271 @param addr [in] address
1272 @param alignedSize [in] power of two
1273 @return aligned addr by alingedSize
1274 */
1275 static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16)
1276 {
1277 return reinterpret_cast<uint8_t*>((reinterpret_cast<size_t>(addr) + alignedSize - 1) & ~(alignedSize - static_cast<size_t>(1)));
1278 }
1279};
1280
1281class Address : public Operand {
1282public:
1283 enum Mode {
1284 M_ModRM,
1285 M_64bitDisp,
1286 M_rip,
1287 M_ripAddr
1288 };
1289 XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
1290 : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
1291 {
1292 e_.verify();
1293 }
1294#ifdef XBYAK64
1295 explicit XBYAK_CONSTEXPR Address(size_t disp)
1296 : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
1297 XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
1298 : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
1299#endif
1300 RegExp getRegExp(bool optimize = true) const
1301 {
1302 return optimize ? e_.optimize() : e_;
1303 }
1304 Mode getMode() const { return mode_; }
1305 bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
1306 bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
1307 size_t getDisp() const { return e_.getDisp(); }
1308 uint8_t getRex() const
1309 {
1310 if (mode_ != M_ModRM) return 0;
1311 return getRegExp().getRex();
1312 }
1313 bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
1314 bool isBroadcast() const { return broadcast_; }
1315 const Label* getLabel() const { return label_; }
1316 bool operator==(const Address& rhs) const
1317 {
1318 return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
1319 }
1320 bool operator!=(const Address& rhs) const { return !operator==(rhs); }
1321 bool isVsib() const { return e_.isVsib(); }
1322private:
1323 RegExp e_;
1324 const Label* label_;
1325 Mode mode_;
1326 bool broadcast_;
1327};
1328
1329inline const Address& Operand::getAddress() const
1330{
1331 assert(isMEM());
1332 return static_cast<const Address&>(*this);
1333}
1334
1335inline bool Operand::operator==(const Operand& rhs) const
1336{
1337 if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress();
1338 return isEqualIfNotInherited(rhs);
1339}
1340
1341class AddressFrame {
1342 void operator=(const AddressFrame&);
1343 AddressFrame(const AddressFrame&);
1344public:
1345 const uint32_t bit_;
1346 const bool broadcast_;
1347 explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { }
1348 Address operator[](const RegExp& e) const
1349 {
1350 return Address(bit_, broadcast_, e);
1351 }
1352 Address operator[](const void *disp) const
1353 {
1354 return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
1355 }
1356#ifdef XBYAK64
1357 Address operator[](uint64_t disp) const { return Address(disp); }
1358 Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
1359#endif
1360};
1361
1362struct JmpLabel {
1363 size_t endOfJmp; /* offset from top to the end address of jmp */
1364 int jmpSize;
1365 inner::LabelMode mode;
1366 size_t disp; // disp for [rip + disp]
1367 explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0)
1368 : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp)
1369 {
1370 }
1371};
1372
1373class LabelManager;
1374
1375class Label {
1376 mutable LabelManager *mgr;
1377 mutable int id;
1378 friend class LabelManager;
1379public:
1380 Label() : mgr(0), id(0) {}
1381 Label(const Label& rhs);
1382 Label& operator=(const Label& rhs);
1383 ~Label();
1384 void clear() { mgr = 0; id = 0; }
1385 int getId() const { return id; }
1386 const uint8_t *getAddress() const;
1387
1388 // backward compatibility
1389 static inline std::string toStr(int num)
1390 {
1391 char buf[16];
1392#if defined(_MSC_VER) && (_MSC_VER < 1900)
1393 _snprintf_s
1394#else
1395 snprintf
1396#endif
1397 (buf, sizeof(buf), ".%08x", num);
1398 return buf;
1399 }
1400};
1401
1402class LabelManager {
1403 // for string label
1404 struct SlabelVal {
1405 size_t offset;
1406 SlabelVal(size_t offset) : offset(offset) {}
1407 };
1408 typedef XBYAK_STD_UNORDERED_MAP<std::string, SlabelVal> SlabelDefList;
1409 typedef XBYAK_STD_UNORDERED_MULTIMAP<std::string, const JmpLabel> SlabelUndefList;
1410 struct SlabelState {
1411 SlabelDefList defList;
1412 SlabelUndefList undefList;
1413 };
1414 typedef std::list<SlabelState> StateList;
1415 // for Label class
1416 struct ClabelVal {
1417 ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {}
1418 size_t offset;
1419 int refCount;
1420 };
1421 typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
1422 typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
1423 typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
1424
1425 CodeArray *base_;
1426 // global : stateList_.front(), local : stateList_.back()
1427 StateList stateList_;
1428 mutable int labelId_;
1429 ClabelDefList clabelDefList_;
1430 ClabelUndefList clabelUndefList_;
1431 LabelPtrList labelPtrList_;
1432
1433 int getId(const Label& label) const
1434 {
1435 if (label.id == 0) label.id = labelId_++;
1436 return label.id;
1437 }
1438 template<class DefList, class UndefList, class T>
1439 void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset)
1440 {
1441 // add label
1442 typename DefList::value_type item(labelId, addrOffset);
1443 std::pair<typename DefList::iterator, bool> ret = defList.insert(item);
1444 if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED)
1445 // search undefined label
1446 for (;;) {
1447 typename UndefList::iterator itr = undefList.find(labelId);
1448 if (itr == undefList.end()) break;
1449 const JmpLabel *jmp = &itr->second;
1450 const size_t offset = jmp->endOfJmp - jmp->jmpSize;
1451 size_t disp;
1452 if (jmp->mode == inner::LaddTop) {
1453 disp = addrOffset;
1454 } else if (jmp->mode == inner::Labs) {
1455 disp = size_t(base_->getCurr());
1456 } else {
1457 disp = addrOffset - jmp->endOfJmp + jmp->disp;
1458#ifdef XBYAK64
1459 if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1460#endif
1461 if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1462 }
1463 if (base_->isAutoGrow()) {
1464 base_->save(offset, disp, jmp->jmpSize, jmp->mode);
1465 } else {
1466 base_->rewrite(offset, disp, jmp->jmpSize);
1467 }
1468 undefList.erase(itr);
1469 }
1470 }
1471 template<class DefList, class T>
1472 bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const
1473 {
1474 typename DefList::const_iterator i = defList.find(label);
1475 if (i == defList.end()) return false;
1476 *offset = i->second.offset;
1477 return true;
1478 }
1479 friend class Label;
1480 void incRefCount(int id, Label *label)
1481 {
1482 clabelDefList_[id].refCount++;
1483 labelPtrList_.insert(label);
1484 }
1485 void decRefCount(int id, Label *label)
1486 {
1487 labelPtrList_.erase(label);
1488 ClabelDefList::iterator i = clabelDefList_.find(id);
1489 if (i == clabelDefList_.end()) return;
1490 if (i->second.refCount == 1) {
1491 clabelDefList_.erase(id);
1492 } else {
1493 --i->second.refCount;
1494 }
1495 }
1496 template<class T>
1497 bool hasUndefinedLabel_inner(const T& list) const
1498 {
1499#ifndef NDEBUG
1500 for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) {
1501 std::cerr << "undefined label:" << i->first << std::endl;
1502 }
1503#endif
1504 return !list.empty();
1505 }
1506 // detach all labels linked to LabelManager
1507 void resetLabelPtrList()
1508 {
1509 for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
1510 (*i)->clear();
1511 }
1512 labelPtrList_.clear();
1513 }
1514public:
1515 LabelManager()
1516 {
1517 reset();
1518 }
1519 ~LabelManager()
1520 {
1521 resetLabelPtrList();
1522 }
1523 void reset()
1524 {
1525 base_ = 0;
1526 labelId_ = 1;
1527 stateList_.clear();
1528 stateList_.push_back(SlabelState());
1529 stateList_.push_back(SlabelState());
1530 clabelDefList_.clear();
1531 clabelUndefList_.clear();
1532 resetLabelPtrList();
1533 }
1534 void enterLocal()
1535 {
1536 stateList_.push_back(SlabelState());
1537 }
1538 void leaveLocal()
1539 {
1540 if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL)
1541 if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
1542 stateList_.pop_back();
1543 }
1544 void set(CodeArray *base) { base_ = base; }
1545 void defineSlabel(std::string label)
1546 {
1547 if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR)
1548 if (label == "@@") {
1549 SlabelDefList& defList = stateList_.front().defList;
1550 SlabelDefList::iterator i = defList.find("@f");
1551 if (i != defList.end()) {
1552 defList.erase(i);
1553 label = "@b";
1554 } else {
1555 i = defList.find("@b");
1556 if (i != defList.end()) {
1557 defList.erase(i);
1558 }
1559 label = "@f";
1560 }
1561 }
1562 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1563 define_inner(st.defList, st.undefList, label, base_->getSize());
1564 }
1565 void defineClabel(Label& label)
1566 {
1567 define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
1568 label.mgr = this;
1569 labelPtrList_.insert(&label);
1570 }
1571 void assign(Label& dst, const Label& src)
1572 {
1573 ClabelDefList::const_iterator i = clabelDefList_.find(src.id);
1574 if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L)
1575 define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
1576 dst.mgr = this;
1577 labelPtrList_.insert(&dst);
1578 }
1579 bool getOffset(size_t *offset, std::string& label) const
1580 {
1581 const SlabelDefList& defList = stateList_.front().defList;
1582 if (label == "@b") {
1583 if (defList.find("@f") != defList.end()) {
1584 label = "@f";
1585 } else if (defList.find("@b") == defList.end()) {
1586 XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false)
1587 }
1588 } else if (label == "@f") {
1589 if (defList.find("@f") != defList.end()) {
1590 label = "@b";
1591 }
1592 }
1593 const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1594 return getOffset_inner(st.defList, offset, label);
1595 }
1596 bool getOffset(size_t *offset, const Label& label) const
1597 {
1598 return getOffset_inner(clabelDefList_, offset, getId(label));
1599 }
1600 void addUndefinedLabel(const std::string& label, const JmpLabel& jmp)
1601 {
1602 SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
1603 st.undefList.insert(SlabelUndefList::value_type(label, jmp));
1604 }
1605 void addUndefinedLabel(const Label& label, const JmpLabel& jmp)
1606 {
1607 clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp));
1608 }
1609 bool hasUndefSlabel() const
1610 {
1611 for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) {
1612 if (hasUndefinedLabel_inner(i->undefList)) return true;
1613 }
1614 return false;
1615 }
1616 bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
1617 const uint8_t *getCode() const { return base_->getCode(); }
1618 bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
1619};
1620
1621inline Label::Label(const Label& rhs)
1622{
1623 id = rhs.id;
1624 mgr = rhs.mgr;
1625 if (mgr) mgr->incRefCount(id, this);
1626}
1627inline Label& Label::operator=(const Label& rhs)
1628{
1629 if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this)
1630 id = rhs.id;
1631 mgr = rhs.mgr;
1632 if (mgr) mgr->incRefCount(id, this);
1633 return *this;
1634}
1635inline Label::~Label()
1636{
1637 if (id && mgr) mgr->decRefCount(id, this);
1638}
1639inline const uint8_t* Label::getAddress() const
1640{
1641 if (mgr == 0 || !mgr->isReady()) return 0;
1642 size_t offset;
1643 if (!mgr->getOffset(&offset, *this)) return 0;
1644 return mgr->getCode() + offset;
1645}
1646
1647typedef enum {
1648 DefaultEncoding,
1649 VexEncoding,
1650 EvexEncoding
1651} PreferredEncoding;
1652
1653class CodeGenerator : public CodeArray {
1654public:
1655 enum LabelType {
1656 T_SHORT,
1657 T_NEAR,
1658 T_FAR, // far jump
1659 T_AUTO // T_SHORT if possible
1660 };
1661private:
1662 CodeGenerator operator=(const CodeGenerator&); // don't call
1663#ifdef XBYAK64
1664 enum { i32e = 32 | 64, BIT = 64 };
1665 static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull);
1666 typedef Reg64 NativeReg;
1667#else
1668 enum { i32e = 32, BIT = 32 };
1669 static const size_t dummyAddr = 0x12345678;
1670 typedef Reg32 NativeReg;
1671#endif
1672 // (XMM, XMM|MEM)
1673 static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2)
1674 {
1675 return op1.isXMM() && (op2.isXMM() || op2.isMEM());
1676 }
1677 // (MMX, MMX|MEM) or (XMM, XMM|MEM)
1678 static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2)
1679 {
1680 return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2);
1681 }
1682 // (XMM, MMX|MEM)
1683 static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2)
1684 {
1685 return op1.isXMM() && (op2.isMMX() || op2.isMEM());
1686 }
1687 // (MMX, XMM|MEM)
1688 static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2)
1689 {
1690 return op1.isMMX() && (op2.isXMM() || op2.isMEM());
1691 }
1692 // (XMM, REG32|MEM)
1693 static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2)
1694 {
1695 return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM());
1696 }
1697 // (REG32, XMM|MEM)
1698 static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2)
1699 {
1700 return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM());
1701 }
1702 // (REG32, REG32|MEM)
1703 static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2)
1704 {
1705 return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
1706 }
1707 static inline bool isValidSSE(const Operand& op1)
1708 {
1709 // SSE instructions do not support XMM16 - XMM31
1710 return !(op1.isXMM() && op1.getIdx() >= 16);
1711 }
1712 void rex(const Operand& op1, const Operand& op2 = Operand())
1713 {
1714 uint8_t rex = 0;
1715 const Operand *p1 = &op1, *p2 = &op2;
1716 if (p1->isMEM()) std::swap(p1, p2);
1717 if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
1718 if (p2->isMEM()) {
1719 const Address& addr = p2->getAddress();
1720 if (BIT == 64 && addr.is32bit()) db(0x67);
1721 rex = addr.getRex() | p1->getReg().getRex();
1722 } else {
1723 // ModRM(reg, base);
1724 rex = op2.getReg().getRex(op1.getReg());
1725 }
1726 // except movsx(16bit, 32/64bit)
1727 if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
1728 if (rex) db(rex);
1729 }
1730 enum AVXtype {
1731 // low 3 bit
1732 T_N1 = 1,
1733 T_N2 = 2,
1734 T_N4 = 3,
1735 T_N8 = 4,
1736 T_N16 = 5,
1737 T_N32 = 6,
1738 T_NX_MASK = 7,
1739 //
1740 T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
1741 T_DUP = 1 << 4, // N = (8, 32, 64)
1742 T_66 = 1 << 5, // pp = 1
1743 T_F3 = 1 << 6, // pp = 2
1744 T_F2 = T_66 | T_F3, // pp = 3
1745 T_ER_R = 1 << 7, // reg{er}
1746 T_0F = 1 << 8,
1747 T_0F38 = 1 << 9,
1748 T_0F3A = 1 << 10,
1749 T_L0 = 1 << 11,
1750 T_L1 = 1 << 12,
1751 T_W0 = 1 << 13,
1752 T_W1 = 1 << 14,
1753 T_EW0 = 1 << 15,
1754 T_EW1 = 1 << 16,
1755 T_YMM = 1 << 17, // support YMM, ZMM
1756 T_EVEX = 1 << 18,
1757 T_ER_X = 1 << 19, // xmm{er}
1758 T_ER_Y = 1 << 20, // ymm{er}
1759 T_ER_Z = 1 << 21, // zmm{er}
1760 T_SAE_X = 1 << 22, // xmm{sae}
1761 T_SAE_Y = 1 << 23, // ymm{sae}
1762 T_SAE_Z = 1 << 24, // zmm{sae}
1763 T_MUST_EVEX = 1 << 25, // contains T_EVEX
1764 T_B32 = 1 << 26, // m32bcst
1765 T_B64 = 1 << 27, // m64bcst
1766 T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
1767 T_M_K = 1 << 28, // mem{k}
1768 T_VSIB = 1 << 29,
1769 T_MEM_EVEX = 1 << 30, // use evex if mem
1770 T_FP16 = 1 << 31, // avx512-fp16
1771 T_MAP5 = T_FP16 | T_0F,
1772 T_MAP6 = T_FP16 | T_0F38,
1773 T_XXX
1774 };
1775 // T_66 = 1, T_F3 = 2, T_F2 = 3
1776 uint32_t getPP(int type) const { return (type >> 5) & 3; }
1777 void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
1778 {
1779 int w = (type & T_W1) ? 1 : 0;
1780 bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
1781 bool r = reg.isExtIdx();
1782 bool b = base.isExtIdx();
1783 int idx = v ? v->getIdx() : 0;
1784 if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
1785 uint32_t pp = getPP(type);
1786 uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
1787 if (!b && !x && !w && (type & T_0F)) {
1788 db(0xC5); db((r ? 0 : 0x80) | vvvv);
1789 } else {
1790 uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1791 db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
1792 }
1793 db(code);
1794 }
1795 void verifySAE(const Reg& r, int type) const
1796 {
1797 if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
1798 XBYAK_THROW(ERR_SAE_IS_INVALID)
1799 }
1800 void verifyER(const Reg& r, int type) const
1801 {
1802 if ((type & T_ER_R) && r.isREG(32|64)) return;
1803 if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
1804 XBYAK_THROW(ERR_ER_IS_INVALID)
1805 }
1806 // (a, b, c) contains non zero two or three values then err
1807 int verifyDuplicate(int a, int b, int c, int err)
1808 {
1809 int v = a | b | c;
1810 if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0)
1811 return v;
1812 }
1813 int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false)
1814 {
1815 if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
1816 int w = (type & T_EW1) ? 1 : 0;
1817 uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
1818 if (type & T_FP16) mmm |= 4;
1819 uint32_t pp = getPP(type);
1820 int idx = v ? v->getIdx() : 0;
1821 uint32_t vvvv = ~idx;
1822
1823 bool R = !reg.isExtIdx();
1824 bool X = x ? false : !base.isExtIdx2();
1825 bool B = !base.isExtIdx();
1826 bool Rp = !reg.isExtIdx2();
1827 int LL;
1828 int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
1829 int disp8N = 1;
1830 if (rounding) {
1831 if (rounding == EvexModifierRounding::T_SAE) {
1832 verifySAE(base, type); LL = 0;
1833 } else {
1834 verifyER(base, type); LL = rounding - 1;
1835 }
1836 b = true;
1837 } else {
1838 if (v) VL = (std::max)(VL, v->getBit());
1839 VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
1840 LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
1841 if (b) {
1842 disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
1843 } else if (type & T_DUP) {
1844 disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
1845 } else {
1846 if ((type & (T_NX_MASK | T_N_VL)) == 0) {
1847 type |= T_N16 | T_N_VL; // default
1848 }
1849 int low = type & T_NX_MASK;
1850 if (low > 0) {
1851 disp8N = 1 << (low - 1);
1852 if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1);
1853 }
1854 }
1855 }
1856 bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
1857 bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
1858 if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
1859 if (aaa == 0) z = 0; // clear T_z if mask is not set
1860 db(0x62);
1861 db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
1862 db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
1863 db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
1864 db(code);
1865 return disp8N;
1866 }
1867 void setModRM(int mod, int r1, int r2)
1868 {
1869 db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
1870 }
1871 void setSIB(const RegExp& e, int reg, int disp8N = 0)
1872 {
1873 uint64_t disp64 = e.getDisp();
1874#ifdef XBYAK64
1875#ifdef XBYAK_OLD_DISP_CHECK
1876 // treat 0xffffffff as 0xffffffffffffffff
1877 uint64_t high = disp64 >> 32;
1878 if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1879#else
1880 // displacement should be a signed 32-bit value, so also check sign bit
1881 uint64_t high = disp64 >> 31;
1882 if (high != 0 && high != 0x1FFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
1883#endif
1884#endif
1885 uint32_t disp = static_cast<uint32_t>(disp64);
1886 const Reg& base = e.getBase();
1887 const Reg& index = e.getIndex();
1888 const int baseIdx = base.getIdx();
1889 const int baseBit = base.getBit();
1890 const int indexBit = index.getBit();
1891 enum {
1892 mod00 = 0, mod01 = 1, mod10 = 2
1893 };
1894 int mod = mod10; // disp32
1895 if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) {
1896 mod = mod00;
1897 } else {
1898 if (disp8N == 0) {
1899 if (inner::IsInDisp8(disp)) {
1900 mod = mod01;
1901 }
1902 } else {
1903 // disp must be casted to signed
1904 uint32_t t = static_cast<uint32_t>(static_cast<int>(disp) / disp8N);
1905 if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) {
1906 disp = t;
1907 mod = mod01;
1908 }
1909 }
1910 }
1911 const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP;
1912 /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
1913 bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP;
1914#ifdef XBYAK64
1915 if (!baseBit && !indexBit) hasSIB = true;
1916#endif
1917 if (hasSIB) {
1918 setModRM(mod, reg, Operand::ESP);
1919 /* SIB = [2:3:3] = [SS:index:base(=rm)] */
1920 const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP;
1921 const int scale = e.getScale();
1922 const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0;
1923 setModRM(SS, idx, newBaseIdx);
1924 } else {
1925 setModRM(mod, reg, newBaseIdx);
1926 }
1927 if (mod == mod01) {
1928 db(disp);
1929 } else if (mod == mod10 || (mod == mod00 && !baseBit)) {
1930 dd(disp);
1931 }
1932 }
1933 LabelManager labelMgr_;
1934 bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
1935 void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE)
1936 {
1937 rex(reg2, reg1);
1938 db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1939 setModRM(3, reg1.getIdx(), reg2.getIdx());
1940 }
1941 void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
1942 {
1943 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1944 rex(addr, reg);
1945 db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
1946 opAddr(addr, reg.getIdx(), immSize);
1947 }
1948 void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE)
1949 {
1950 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1951 if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
1952 rex(addr, reg);
1953 db(code0); if (code1 != NONE) db(code1);
1954 opAddr(addr, reg.getIdx());
1955 }
1956 void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
1957 {
1958 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
1959 if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
1960 if (BIT == 64 && addr.is32bit()) db(0x67);
1961 const RegExp& regExp = addr.getRegExp(false);
1962 uint8_t rex = regExp.getRex();
1963 if (rex) db(rex);
1964 db(code0); db(code1);
1965 setSIB(regExp, reg.getIdx());
1966 }
1967 void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1968 {
1969 const int shortJmpSize = 2;
1970 const int longHeaderSize = longPref ? 2 : 1;
1971 const int longJmpSize = longHeaderSize + 4;
1972 if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) {
1973 db(shortCode); db(disp - shortJmpSize);
1974 } else {
1975 if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
1976 if (longPref) db(longPref);
1977 db(longCode); dd(disp - longJmpSize);
1978 }
1979 }
1980 bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); }
1981 template<class T>
1982 void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
1983 {
1984 if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
1985 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
1986 size_t offset = 0;
1987 if (labelMgr_.getOffset(&offset, label)) { /* label exists */
1988 makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
1989 } else {
1990 int jmpSize = 0;
1991 if (isNEAR(type)) {
1992 jmpSize = 4;
1993 if (longPref) db(longPref);
1994 db(longCode); dd(0);
1995 } else {
1996 jmpSize = 1;
1997 db(shortCode); db(0);
1998 }
1999 JmpLabel jmp(size_, jmpSize, inner::LasIs);
2000 labelMgr_.addUndefinedLabel(label, jmp);
2001 }
2002 }
2003 void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
2004 {
2005 if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
2006 if (isAutoGrow()) {
2007 if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
2008 if (size_ + 16 >= maxSize_) growMemory();
2009 if (longPref) db(longPref);
2010 db(longCode);
2011 dd(0);
2012 save(size_ - 4, size_t(addr) - size_, 4, inner::Labs);
2013 } else {
2014 makeJmp(inner::VerifyInInt32(reinterpret_cast<const uint8_t*>(addr) - getCurr()), type, shortCode, longCode, longPref);
2015 }
2016
2017 }
2018 void opJmpOp(const Operand& op, LabelType type, int ext)
2019 {
2020 const int bit = 16|i32e;
2021 if (type == T_FAR) {
2022 if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
2023 opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
2024 } else {
2025 opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
2026 }
2027 }
2028 // reg is reg field of ModRM
2029 // immSize is the size for immediate value
2030 // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
2031 void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
2032 {
2033 if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2034 if (addr.getMode() == Address::M_ModRM) {
2035 setSIB(addr.getRegExp(), reg, disp8N);
2036 } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
2037 setModRM(0, reg, 5);
2038 if (addr.getLabel()) { // [rip + Label]
2039 putL_inner(*addr.getLabel(), true, addr.getDisp() - immSize);
2040 } else {
2041 size_t disp = addr.getDisp();
2042 if (addr.getMode() == Address::M_ripAddr) {
2043 if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
2044 disp -= (size_t)getCurr() + 4 + immSize;
2045 }
2046 dd(inner::VerifyInInt32(disp));
2047 }
2048 }
2049 }
2050 /* preCode is for SSSE3/SSE4 */
2051 void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
2052 {
2053 if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
2054 if (!isValidSSE(reg) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
2055 if (pref != NONE) db(pref);
2056 if (op.isMEM()) {
2057 opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
2058 } else {
2059 opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
2060 }
2061 if (imm8 != NONE) db(imm8);
2062 }
2063 void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
2064 {
2065 if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
2066 if (mmx.isXMM()) db(0x66);
2067 opModR(Reg32(ext), mmx, 0x0F, code);
2068 db(imm8);
2069 }
2070 void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
2071 {
2072 opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode);
2073 }
2074 void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
2075 {
2076 if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED)
2077 if (pref != NONE) db(pref);
2078 if (op1.isXMM() && op2.isMEM()) {
2079 opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
2080 } else if (op1.isMEM() && op2.isXMM()) {
2081 opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
2082 } else {
2083 XBYAK_THROW(ERR_BAD_COMBINATION)
2084 }
2085 }
2086 void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
2087 {
2088 if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
2089 if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
2090 if (mmx.isXMM()) db(0x66);
2091 opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
2092 } else {
2093 opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
2094 }
2095 }
2096 void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
2097 {
2098 int opBit = op.getBit();
2099 if (disableRex && opBit == 64) opBit = 32;
2100 if (op.isREG(bit)) {
2101 opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
2102 } else if (op.isMEM()) {
2103 opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
2104 } else {
2105 XBYAK_THROW(ERR_BAD_COMBINATION)
2106 }
2107 }
2108 void opShift(const Operand& op, int imm, int ext)
2109 {
2110 verifyMemHasSize(op);
2111 opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
2112 if (imm != 1) db(imm);
2113 }
2114 void opShift(const Operand& op, const Reg8& _cl, int ext)
2115 {
2116 if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2117 opR_ModM(op, 0, ext, 0xD2);
2118 }
2119 void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
2120 {
2121 if (condR) {
2122 opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
2123 } else if (condM) {
2124 opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
2125 } else {
2126 XBYAK_THROW(ERR_BAD_COMBINATION)
2127 }
2128 }
2129 void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0)
2130 {
2131 if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION)
2132 opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
2133 if (!_cl) db(imm);
2134 }
2135 // (REG, REG|MEM), (MEM, REG)
2136 void opRM_RM(const Operand& op1, const Operand& op2, int code)
2137 {
2138 if (op1.isREG() && op2.isMEM()) {
2139 opModM(op2.getAddress(), op1.getReg(), code | 2);
2140 } else {
2141 opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
2142 }
2143 }
2144 // (REG|MEM, IMM)
2145 void opRM_I(const Operand& op, uint32_t imm, int code, int ext)
2146 {
2147 verifyMemHasSize(op);
2148 uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
2149 if (op.isBit(8)) immBit = 8;
2150 if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2151 if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
2152 if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al
2153 rex(op);
2154 db(code | 4 | (immBit == 8 ? 0 : 1));
2155 } else {
2156 int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0;
2157 opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
2158 }
2159 db(imm, immBit / 8);
2160 }
2161 void opIncDec(const Operand& op, int code, int ext)
2162 {
2163 verifyMemHasSize(op);
2164#ifndef XBYAK64
2165 if (op.isREG() && !op.isBit(8)) {
2166 rex(op); db(code | op.getIdx());
2167 return;
2168 }
2169#endif
2170 code = 0xFE;
2171 if (op.isREG()) {
2172 opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
2173 } else {
2174 opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
2175 }
2176 }
2177 void opPushPop(const Operand& op, int code, int ext, int alt)
2178 {
2179 int bit = op.getBit();
2180 if (bit == 16 || bit == BIT) {
2181 if (bit == 16) db(0x66);
2182 if (op.isREG()) {
2183 if (op.getReg().getIdx() >= 8) db(0x41);
2184 db(alt | (op.getIdx() & 7));
2185 return;
2186 }
2187 if (op.isMEM()) {
2188 opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
2189 return;
2190 }
2191 }
2192 XBYAK_THROW(ERR_BAD_COMBINATION)
2193 }
2194 void verifyMemHasSize(const Operand& op) const
2195 {
2196 if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED)
2197 }
2198 /*
2199 mov(r, imm) = db(imm, mov_imm(r, imm))
2200 */
2201 int mov_imm(const Reg& reg, uint64_t imm)
2202 {
2203 int bit = reg.getBit();
2204 const int idx = reg.getIdx();
2205 int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
2206 if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) {
2207 rex(Reg32(idx));
2208 bit = 32;
2209 } else {
2210 rex(reg);
2211 if (bit == 64 && inner::IsInInt32(imm)) {
2212 db(0xC7);
2213 code = 0xC0;
2214 bit = 32;
2215 }
2216 }
2217 db(code | (idx & 7));
2218 return bit / 8;
2219 }
2220 template<class T>
2221 void putL_inner(T& label, bool relative = false, size_t disp = 0)
2222 {
2223 const int jmpSize = relative ? 4 : (int)sizeof(size_t);
2224 if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory();
2225 size_t offset = 0;
2226 if (labelMgr_.getOffset(&offset, label)) {
2227 if (relative) {
2228 db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
2229 } else if (isAutoGrow()) {
2230 db(uint64_t(0), jmpSize);
2231 save(size_ - jmpSize, offset, jmpSize, inner::LaddTop);
2232 } else {
2233 db(size_t(top_) + offset, jmpSize);
2234 }
2235 return;
2236 }
2237 db(uint64_t(0), jmpSize);
2238 JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp);
2239 labelMgr_.addUndefinedLabel(label, jmp);
2240 }
2241 void opMovxx(const Reg& reg, const Operand& op, uint8_t code)
2242 {
2243 if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
2244 int w = op.isBit(16);
2245 bool cond = reg.isREG() && (reg.getBit() > op.getBit());
2246 opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w);
2247 }
2248 void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
2249 {
2250 if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
2251 uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
2252 if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE)
2253 if (m64ext && addr.isBit(64)) ext = m64ext;
2254
2255 rex(addr, st0);
2256 db(code);
2257 opAddr(addr, ext);
2258 }
2259 // use code1 if reg1 == st0
2260 // use code2 if reg1 != st0 && reg2 == st0
2261 void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2)
2262 {
2263 uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0;
2264 if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION)
2265 db(uint8_t(code >> 8));
2266 db(uint8_t(code | (reg1.getIdx() | reg2.getIdx())));
2267 }
2268 void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2)
2269 {
2270 db(code1); db(code2 | reg.getIdx());
2271 }
2272 void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
2273 {
2274 if (op2.isMEM()) {
2275 const Address& addr = op2.getAddress();
2276 const RegExp& regExp = addr.getRegExp();
2277 const Reg& base = regExp.getBase();
2278 const Reg& index = regExp.getIndex();
2279 if (BIT == 64 && addr.is32bit()) db(0x67);
2280 int disp8N = 0;
2281 bool x = index.isExtIdx();
2282 if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
2283 int aaa = addr.getOpmaskIdx();
2284 if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY)
2285 bool b = false;
2286 if (addr.isBroadcast()) {
2287 if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST)
2288 b = true;
2289 }
2290 int VL = regExp.isVsib() ? index.getBit() : 0;
2291 disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
2292 } else {
2293 vex(r, base, p1, type, code, x);
2294 }
2295 opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
2296 } else {
2297 const Reg& base = op2.getReg();
2298 if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
2299 evex(r, base, p1, type, code);
2300 } else {
2301 vex(r, base, p1, type, code);
2302 }
2303 setModRM(3, r.getIdx(), base.getIdx());
2304 }
2305 if (imm8 != NONE) db(imm8);
2306 }
2307 // (r, r, r/m) if isR_R_RM
2308 // (r, r/m, r)
2309 void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE)
2310 {
2311 const Operand *p1 = &op1;
2312 const Operand *p2 = &op2;
2313 if (!isR_R_RM) std::swap(p1, p2);
2314 const unsigned int bit = r.getBit();
2315 if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION)
2316 type |= (bit == 64) ? T_W1 : T_W0;
2317 opVex(r, p1, *p2, type, code, imm8);
2318 }
2319 void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, int imm8 = NONE)
2320 {
2321 const Xmm *x2 = static_cast<const Xmm*>(&op1);
2322 const Operand *op = &op2;
2323 if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1)
2324 x2 = &x1;
2325 op = &op1;
2326 }
2327 // (x1, x2, op)
2328 if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION)
2329 opVex(x1, x2, *op, type, code0, imm8);
2330 }
2331 void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE)
2332 {
2333 if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION)
2334 opVex(k, &x2, op3, type, code0, imm8);
2335 }
2336 // (x, x/m), (y, x/m256), (z, y/m)
2337 void checkCvt1(const Operand& x, const Operand& op) const
2338 {
2339 if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION)
2340 }
2341 // (x, x/m), (x, y/m256), (y, z/m)
2342 void checkCvt2(const Xmm& x, const Operand& op) const
2343 {
2344 if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2345 }
2346 void opCvt(const Xmm& x, const Operand& op, int type, int code)
2347 {
2348 Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
2349 opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2350 }
2351 void opCvt2(const Xmm& x, const Operand& op, int type, int code)
2352 {
2353 checkCvt2(x, op);
2354 opCvt(x, op, type, code);
2355 }
2356 void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
2357 {
2358 if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2359 Xmm x(op.getIdx());
2360 const Operand *p = op.isREG() ? &x : &op;
2361 opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
2362 }
2363 // (x, x/y/xword/yword), (y, z/m)
2364 void checkCvt4(const Xmm& x, const Operand& op) const
2365 {
2366 if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
2367 }
2368 // (x, x/y/z/xword/yword/zword)
2369 void opCvt5(const Xmm& x, const Operand& op, int type, int code)
2370 {
2371 if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
2372 Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
2373 opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
2374 }
2375 const Xmm& cvtIdx0(const Operand& x) const
2376 {
2377 return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
2378 }
2379 // support (x, x/m, imm), (y, y/m, imm)
2380 void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, int imm8 = NONE)
2381 {
2382 opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
2383 }
2384 // QQQ:need to refactor
2385 void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1)
2386 {
2387 if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
2388 bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
2389 if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2390 if (is16bit) db(0x66);
2391 db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1);
2392 }
2393 void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode)
2394 {
2395 const RegExp& regExp = addr.getRegExp();
2396 if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2397 const int y_vx_y = 0;
2398 const int y_vy_y = 1;
2399// const int x_vy_x = 2;
2400 const bool isAddrYMM = regExp.getIndex().getBit() == 256;
2401 if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
2402 bool isOK = false;
2403 if (mode == y_vx_y) {
2404 isOK = x1.isYMM() && !isAddrYMM && x2.isYMM();
2405 } else if (mode == y_vy_y) {
2406 isOK = x1.isYMM() && isAddrYMM && x2.isYMM();
2407 } else { // x_vy_x
2408 isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM();
2409 }
2410 if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2411 }
2412 int i1 = x1.getIdx();
2413 int i2 = regExp.getIndex().getIdx();
2414 int i3 = x2.getIdx();
2415 if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2416 opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code);
2417 }
2418 enum {
2419 xx_yy_zz = 0,
2420 xx_yx_zy = 1,
2421 xx_xy_yz = 2
2422 };
2423 void checkGather2(const Xmm& x1, const Reg& x2, int mode) const
2424 {
2425 if (x1.isXMM() && x2.isXMM()) return;
2426 switch (mode) {
2427 case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
2428 break;
2429 case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
2430 break;
2431 case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
2432 break;
2433 }
2434 XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2435 }
2436 void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
2437 {
2438 if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2439 const RegExp& regExp = addr.getRegExp();
2440 checkGather2(x, regExp.getIndex(), mode);
2441 int maskIdx = x.getOpmaskIdx();
2442 if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx();
2443 if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID);
2444 if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
2445 opVex(x, 0, addr, type, code);
2446 }
2447 /*
2448 xx_xy_yz ; mode = true
2449 xx_xy_xz ; mode = false
2450 */
2451 void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode)
2452 {
2453 if (mode) {
2454 if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
2455 } else {
2456 if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2457 }
2458 opVex(x, 0, op, type, code);
2459 }
2460 void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind)
2461 {
2462 if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
2463 if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
2464 opVex(x, 0, addr, type, code);
2465 }
2466 void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
2467 {
2468 opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0);
2469 }
2470 int orEvexIf(PreferredEncoding encoding) {
2471 if (encoding == DefaultEncoding) {
2472 encoding = defaultEncoding_;
2473 }
2474 if (encoding == EvexEncoding) {
2475#ifdef XBYAK_DISABLE_AVX512
2476 XBYAK_THROW(ERR_EVEX_IS_INVALID)
2477#endif
2478 return T_MUST_EVEX;
2479 }
2480 return 0;
2481 }
2482 void opInOut(const Reg& a, const Reg& d, uint8_t code)
2483 {
2484 if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) {
2485 switch (a.getBit()) {
2486 case 8: db(code); return;
2487 case 16: db(0x66); db(code + 1); return;
2488 case 32: db(code + 1); return;
2489 }
2490 }
2491 XBYAK_THROW(ERR_BAD_COMBINATION)
2492 }
2493 void opInOut(const Reg& a, uint8_t code, uint8_t v)
2494 {
2495 if (a.getIdx() == Operand::AL) {
2496 switch (a.getBit()) {
2497 case 8: db(code); db(v); return;
2498 case 16: db(0x66); db(code + 1); db(v); return;
2499 case 32: db(code + 1); db(v); return;
2500 }
2501 }
2502 XBYAK_THROW(ERR_BAD_COMBINATION)
2503 }
2504#ifdef XBYAK64
2505 void opAMX(const Tmm& t1, const Address& addr, int type, int code0)
2506 {
2507 // require both base and index
2508 const RegExp exp = addr.getRegExp(false);
2509 if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
2510 opVex(t1, &tmm0, addr, type, code0);
2511 }
2512#endif
2513public:
2514 unsigned int getVersion() const { return VERSION; }
2515 using CodeArray::db;
2516 const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
2517 const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
2518 const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
2519 const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
2520 const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7;
2521 const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7;
2522 const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7;
2523 const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
2524 const Reg16 ax, cx, dx, bx, sp, bp, si, di;
2525 const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
2526 const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
2527 const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
2528 const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
2529 const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
2530 const BoundsReg bnd0, bnd1, bnd2, bnd3;
2531 const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
2532 const EvexModifierZero T_z; // {z}
2533#ifdef XBYAK64
2534 const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
2535 const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
2536 const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
2537 const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
2538 const Reg8 spl, bpl, sil, dil;
2539 const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
2540 const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23;
2541 const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31;
2542 const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
2543 const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23;
2544 const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31;
2545 const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15;
2546 const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23;
2547 const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31;
2548 const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7;
2549 const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience
2550 const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23;
2551 const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31;
2552 const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15;
2553 const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23;
2554 const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31;
2555 const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15;
2556 const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23;
2557 const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31;
2558 const RegRip rip;
2559#endif
2560#ifndef XBYAK_DISABLE_SEGMENT
2561 const Segment es, cs, ss, ds, fs, gs;
2562#endif
2563private:
2564 bool isDefaultJmpNEAR_;
2565 PreferredEncoding defaultEncoding_;
2566public:
2567 void L(const std::string& label) { labelMgr_.defineSlabel(label); }
2568 void L(Label& label) { labelMgr_.defineClabel(label); }
2569 Label L() { Label label; L(label); return label; }
2570 void inLocalLabel() { labelMgr_.enterLocal(); }
2571 void outLocalLabel() { labelMgr_.leaveLocal(); }
2572 /*
2573 assign src to dst
2574 require
2575 dst : does not used by L()
2576 src : used by L()
2577 */
2578 void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); }
2579 /*
2580 put address of label to buffer
2581 @note the put size is 4(32-bit), 8(64-bit)
2582 */
2583 void putL(std::string label) { putL_inner(label); }
2584 void putL(const Label& label) { putL_inner(label); }
2585
2586 // set default type of `jmp` of undefined label to T_NEAR
2587 void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
2588 void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
2589 void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2590 void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
2591 void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
2592 void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
2593
2594 void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
2595 // call(string label), not const std::string&
2596 void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2597 void call(const char *label) { call(std::string(label)); }
2598 void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
2599 // call(function pointer)
2600#ifdef XBYAK_VARIADIC_TEMPLATE
2601 template<class Ret, class... Params>
2602 void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
2603#endif
2604 void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
2605
2606 void test(const Operand& op, const Reg& reg)
2607 {
2608 opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84);
2609 }
2610 void test(const Operand& op, uint32_t imm)
2611 {
2612 verifyMemHasSize(op);
2613 int immSize = (std::min)(op.getBit() / 8, 4U);
2614 if (op.isREG() && op.getIdx() == 0) { // al, ax, eax
2615 rex(op);
2616 db(0xA8 | (op.isBit(8) ? 0 : 1));
2617 } else {
2618 opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
2619 }
2620 db(imm, immSize);
2621 }
2622 void imul(const Reg& reg, const Operand& op)
2623 {
2624 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, 0xAF);
2625 }
2626 void imul(const Reg& reg, const Operand& op, int imm)
2627 {
2628 int s = inner::IsInDisp8(imm) ? 1 : 0;
2629 int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
2630 opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x69 | (s << 1), NONE, NONE, immSize);
2631 db(imm, immSize);
2632 }
2633 void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); }
2634 void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); }
2635 void push(const AddressFrame& af, uint32_t imm)
2636 {
2637 if (af.bit_ == 8) {
2638 db(0x6A); db(imm);
2639 } else if (af.bit_ == 16) {
2640 db(0x66); db(0x68); dw(imm);
2641 } else {
2642 db(0x68); dd(imm);
2643 }
2644 }
2645 /* use "push(word, 4)" if you want "push word 4" */
2646 void push(uint32_t imm)
2647 {
2648 if (inner::IsInDisp8(imm)) {
2649 push(byte, imm);
2650 } else {
2651 push(dword, imm);
2652 }
2653 }
2654 void mov(const Operand& reg1, const Operand& reg2)
2655 {
2656 const Reg *reg = 0;
2657 const Address *addr = 0;
2658 uint8_t code = 0;
2659 if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
2660 reg = &reg1.getReg();
2661 addr= &reg2.getAddress();
2662 code = 0xA0;
2663 } else
2664 if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
2665 reg = &reg2.getReg();
2666 addr= &reg1.getAddress();
2667 code = 0xA2;
2668 }
2669#ifdef XBYAK64
2670 if (addr && addr->is64bitDisp()) {
2671 if (code) {
2672 rex(*reg);
2673 db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
2674 db(addr->getDisp(), 8);
2675 } else {
2676 XBYAK_THROW(ERR_BAD_COMBINATION)
2677 }
2678 } else
2679#else
2680 if (code && addr->isOnlyDisp()) {
2681 rex(*reg, *addr);
2682 db(code | (reg->isBit(8) ? 0 : 1));
2683 dd(static_cast<uint32_t>(addr->getDisp()));
2684 } else
2685#endif
2686 {
2687 opRM_RM(reg1, reg2, 0x88);
2688 }
2689 }
2690 void mov(const Operand& op, uint64_t imm)
2691 {
2692 if (op.isREG()) {
2693 const int size = mov_imm(op.getReg(), imm);
2694 db(imm, size);
2695 } else if (op.isMEM()) {
2696 verifyMemHasSize(op);
2697 int immSize = op.getBit() / 8;
2698 if (immSize <= 4) {
2699 int64_t s = int64_t(imm) >> (immSize * 8);
2700 if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2701 } else {
2702 if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG)
2703 immSize = 4;
2704 }
2705 opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
2706 db(static_cast<uint32_t>(imm), immSize);
2707 } else {
2708 XBYAK_THROW(ERR_BAD_COMBINATION)
2709 }
2710 }
2711
2712 // The template is used to avoid ambiguity when the 2nd argument is 0.
2713 // When the 2nd argument is 0 the call goes to
2714 // `void mov(const Operand& op, uint64_t imm)`.
2715 template <typename T1, typename T2>
2716 void mov(const T1&, const T2 *) { T1::unexpected; }
2717 void mov(const NativeReg& reg, const Label& label)
2718 {
2719 mov_imm(reg, dummyAddr);
2720 putL(label);
2721 }
2722 void xchg(const Operand& op1, const Operand& op2)
2723 {
2724 const Operand *p1 = &op1, *p2 = &op2;
2725 if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) {
2726 p1 = &op2; p2 = &op1;
2727 }
2728 if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
2729 if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0)
2730#ifdef XBYAK64
2731 && (p2->getIdx() != 0 || !p1->isREG(32))
2732#endif
2733 ) {
2734 rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
2735 return;
2736 }
2737 opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), 0x86 | (p1->isBit(8) ? 0 : 1));
2738 }
2739
2740#ifndef XBYAK_DISABLE_SEGMENT
2741 void push(const Segment& seg)
2742 {
2743 switch (seg.getIdx()) {
2744 case Segment::es: db(0x06); break;
2745 case Segment::cs: db(0x0E); break;
2746 case Segment::ss: db(0x16); break;
2747 case Segment::ds: db(0x1E); break;
2748 case Segment::fs: db(0x0F); db(0xA0); break;
2749 case Segment::gs: db(0x0F); db(0xA8); break;
2750 default:
2751 assert(0);
2752 }
2753 }
2754 void pop(const Segment& seg)
2755 {
2756 switch (seg.getIdx()) {
2757 case Segment::es: db(0x07); break;
2758 case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION)
2759 case Segment::ss: db(0x17); break;
2760 case Segment::ds: db(0x1F); break;
2761 case Segment::fs: db(0x0F); db(0xA1); break;
2762 case Segment::gs: db(0x0F); db(0xA9); break;
2763 default:
2764 assert(0);
2765 }
2766 }
2767 void putSeg(const Segment& seg)
2768 {
2769 switch (seg.getIdx()) {
2770 case Segment::es: db(0x2E); break;
2771 case Segment::cs: db(0x36); break;
2772 case Segment::ss: db(0x3E); break;
2773 case Segment::ds: db(0x26); break;
2774 case Segment::fs: db(0x64); break;
2775 case Segment::gs: db(0x65); break;
2776 default:
2777 assert(0);
2778 }
2779 }
2780 void mov(const Operand& op, const Segment& seg)
2781 {
2782 opModRM(Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
2783 }
2784 void mov(const Segment& seg, const Operand& op)
2785 {
2786 opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
2787 }
2788#endif
2789
2790 enum { NONE = 256 };
2791 // constructor
2792 CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0)
2793 : CodeArray(maxSize, userPtr, allocator)
2794 , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7)
2795 , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7)
2796 , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7)
2797 , zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7)
2798 // for my convenience
2799 , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7)
2800 , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7)
2801 , zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7)
2802
2803 , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
2804 , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
2805 , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
2806 , ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
2807 , ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
2808 , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
2809 , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
2810 , bnd0(0), bnd1(1), bnd2(2), bnd3(3)
2811 , T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
2812 , T_z()
2813#ifdef XBYAK64
2814 , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
2815 , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15)
2816 , r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15)
2817 , r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15)
2818 , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true)
2819 , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15)
2820 , xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23)
2821 , xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31)
2822 , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15)
2823 , ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23)
2824 , ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31)
2825 , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15)
2826 , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23)
2827 , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31)
2828 , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7)
2829 // for my convenience
2830 , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15)
2831 , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23)
2832 , xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31)
2833 , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15)
2834 , ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23)
2835 , ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31)
2836 , zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15)
2837 , zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23)
2838 , zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31)
2839 , rip()
2840#endif
2841#ifndef XBYAK_DISABLE_SEGMENT
2842 , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
2843#endif
2844 , isDefaultJmpNEAR_(false)
2845 , defaultEncoding_(EvexEncoding)
2846 {
2847 ClearError();
2848 labelMgr_.set(this);
2849 }
2850 void reset()
2851 {
2852 resetSize();
2853 labelMgr_.reset();
2854 labelMgr_.set(this);
2855 }
2856 bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); }
2857 /*
2858 MUST call ready() to complete generating code if you use AutoGrow mode.
2859 It is not necessary for the other mode if hasUndefinedLabel() is true.
2860 */
2861 void ready(ProtectMode mode = PROTECT_RWE)
2862 {
2863 if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND)
2864 if (isAutoGrow()) {
2865 calcJmpAddress();
2866 if (useProtect()) setProtectMode(mode);
2867 }
2868 }
2869 // set read/exec
2870 void readyRE() { return ready(PROTECT_RE); }
2871#ifdef XBYAK_TEST
2872 void dump(bool doClear = true)
2873 {
2874 CodeArray::dump();
2875 if (doClear) size_ = 0;
2876 }
2877#endif
2878
2879#ifdef XBYAK_UNDEF_JNL
2880 #undef jnl
2881#endif
2882
2883 // set default encoding to select Vex or Evex
2884 void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
2885
2886 /*
2887 use single byte nop if useMultiByteNop = false
2888 */
2889 void nop(size_t size = 1, bool useMultiByteNop = true)
2890 {
2891 if (!useMultiByteNop) {
2892 for (size_t i = 0; i < size; i++) {
2893 db(0x90);
2894 }
2895 return;
2896 }
2897 /*
2898 Intel Architectures Software Developer's Manual Volume 2
2899 recommended multi-byte sequence of NOP instruction
2900 AMD and Intel seem to agree on the same sequences for up to 9 bytes:
2901 https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
2902 */
2903 static const uint8_t nopTbl[9][9] = {
2904 {0x90},
2905 {0x66, 0x90},
2906 {0x0F, 0x1F, 0x00},
2907 {0x0F, 0x1F, 0x40, 0x00},
2908 {0x0F, 0x1F, 0x44, 0x00, 0x00},
2909 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
2910 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
2911 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2912 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
2913 };
2914 const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
2915 while (size > 0) {
2916 size_t len = (std::min)(n, size);
2917 const uint8_t *seq = nopTbl[len - 1];
2918 db(seq, len);
2919 size -= len;
2920 }
2921 }
2922
2923#ifndef XBYAK_DONT_READ_LIST
2924#include "xbyak_mnemonic.h"
2925 /*
2926 use single byte nop if useMultiByteNop = false
2927 */
2928 void align(size_t x = 16, bool useMultiByteNop = true)
2929 {
2930 if (x == 1) return;
2931 if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
2932 if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
2933 size_t remain = size_t(getCurr()) % x;
2934 if (remain) {
2935 nop(x - remain, useMultiByteNop);
2936 }
2937 }
2938#endif
2939};
2940
2941template <>
2942inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string
2943{
2944 assert(label);
2945 mov_imm(reg, dummyAddr);
2946 putL(label);
2947}
2948
2949namespace util {
2950static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7);
2951static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7);
2952static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7);
2953static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7);
2954static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
2955static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
2956static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
2957static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
2958static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
2959static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
2960static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
2961static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
2962static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
2963static const XBYAK_CONSTEXPR EvexModifierZero T_z;
2964#ifdef XBYAK64
2965static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
2966static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
2967static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15);
2968static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true);
2969static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15);
2970static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23);
2971static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31);
2972static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15);
2973static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23);
2974static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31);
2975static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15);
2976static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23);
2977static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31);
2978static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7);
2979static const XBYAK_CONSTEXPR RegRip rip;
2980#endif
2981#ifndef XBYAK_DISABLE_SEGMENT
2982static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs);
2983#endif
2984} // util
2985
2986#ifdef _MSC_VER
2987 #pragma warning(pop)
2988#endif
2989
2990#if defined(__GNUC__) && !defined(__clang__)
2991 #pragma GCC diagnostic pop
2992#endif
2993
2994} // end of namespace
2995
2996#endif // XBYAK_XBYAK_H_
2997